Pupil size light reflex to a light test is a potential test to determine a person is under the influence of THC and be able to be use as a field sobriety measures. The goals of the analysis to:
Pupil size light reflex to light does not habituated to THC use (smoking).
Time for smoking to post test maybe an important variable to model b/c:
There is a circadian pattern to pupil size (https://www.ncbi.nlm.nih.gov/pmc/articles/PMC7445830/).
Currently this field does not use FDA, so any FDA in topic area may be publishable
Data on pupil size during light reflex test. Pupil size was extracted at the image level based on image analysis techniques. Each test was performed simultaneously on right and left eye before and after THC use (smoking).
ps <- read.csv(file.path(data_folder, ps_folder, "pupil_data_with_demo_20220926.csv"))
ps$demo_gender <- factor(ps$demo_gender,
levels = c(1,2),
labels = c("Male", "Female"))
ps$user_cat <- NA
ps$user_cat[ps$user_type == "non-user"] <- 0
ps$user_cat[ps$user_type == "occasional"] <- 1
ps$user_cat[ps$user_type == "daily"] <- 2
ps$user_cat <- factor(ps$user_cat,
levels = c(0,1,2),
labels = c("non-user",
"occasional",
"daily"))
ps$tp <- NA
ps$tp[ps$time == "pre2"] <- 0
ps$tp[ps$time == "post"] <- 1
ps$tp <- factor(ps$tp,
levels = c(0,1),
labels = c("pre2", "post"))
# str(ps)
## -- Not needed with Ben's revised file missing frame numbers recorded
# #impute frame number
# for(i in 2:(nrow(ps)-1)){
# if(is.na(ps$frame[i]) & is.na(ps$frame[i+1] - ps$frame[i-1])){
# ps$frame[i] <- ps$frame[i-1]+1
# }else(if(is.na(ps$frame[i]) & (ps$frame[i+1] - ps$frame[i-1]) == 2){
# ps$frame[i] <- ps$frame[i-1]+1
# }else(if(is.na(ps$frame[i]) & (ps$frame[i+1] - ps$frame[i-1]) > 2){
# if(abs(ps$percent_change[i] - ps$percent_change[i-1]) <
# abs(ps$percent_change[i+1] - ps$percent_change[i])){
# ps$frame[i] <- ps$frame[i-1]+1
# }else(ps$frame[i] <- ps$frame[i+1]-1)
# }
# )
# )
# }
# total number of subjects
length(unique(ps$subject_id))
## [1] 101
# subject level data
pt.df <- unique(ps[, c("subject_id", "tp", "user_cat", "demo_age",
"demo_weight", "demo_height", "demo_gender", "thc")])
# # Demographics Table by User Category
# table1(~ demo_age + demo_weight + demo_height + demo_gender|user_cat,
# data = pt.df[pt.df$tp == "pre2", ])
# number of subjects by timepoint (pre/post)
table(pt.df$tp)
##
## pre2 post
## 99 95
There are more subjects in total than the by time point. Indicating incomplete data with some subjects missing “pre” and others missing “post” measurement.
Add scalar features for each participant-time point
scalar.feat <- read.csv(file.path(data_folder, ps_folder, "scalars_trim.csv"),
header = T, stringsAsFactors = F)
scalar.feat$subject_id <- substr(scalar.feat$timeid, 1, 7)
scalar.feat$tp <- substr(scalar.feat$timeid, 8, 11)
scalar.featR <- scalar.feat[scalar.feat$eye == "Right", ]
pt.df <- merge(pt.df, scalar.featR[, c("subject_id", "tp", "min_constriction",
"duration", "avg_end_percent", "slope",
"AUC", "eye")],
by = c("subject_id", "tp"))
Reshape participant demog data to preserve pre and post THC levels and scalar features
pt.df.w <- reshape(pt.df,
timevar = "tp",
idvar = c("subject_id", "user_cat", "demo_age",
"demo_weight", "demo_height", "demo_gender", "eye"),
direction = "wide")
pt.df.w$thc.post[pt.df.w$user_cat == "non-user" & is.na(pt.df.w$thc.post)] <- 0
pt.df.w$thc_chg <- pt.df.w$thc.post - pt.df.w$thc.pre2
pt.df.w$BMI <- pt.df.w$demo_weight*703/(pt.df.w$demo_height)^2
pt.df.w$min_constriction_chg <- pt.df.w$min_constriction.post - pt.df.w$min_constriction.pre2
pt.df.w$AUC_chg <- pt.df.w$AUC.post - pt.df.w$AUC.pre2
pt.df.w$duration_chg <- pt.df.w$duration.post - pt.df.w$duration.pre2
pt.df.w$avg_end_percent_chg <- pt.df.w$avg_end_percent.post - pt.df.w$avg_end_percent.pre2
pt.df.w$slope_chg <- pt.df.w$slope.post - pt.df.w$slope.pre2
## Need to work on; time formatted in Excel file.
testTimes <- read.xlsx(file.path(data_folder, ps_folder, "All SafetyScan Times_23Aug2022_revSG.xlsx"),
sheet = "Raw")
testTimes$pre_safetyscan_date_convert <- as.Date(testTimes$pre_safetyscan_date, origin = "1899-12-30")
testTimes$pre_safetyscan_time_convert <- as.POSIXct(paste0(testTimes$pre_safetyscan_date_convert, " ", testTimes$pre_safetyscan_time_hr, ":", testTimes$pre_safetyscan_time_min, ":", "00"), format = "%Y-%m-%d %H:%M:%S")
testTimes$consump_start_time_convert <- as.POSIXct(paste0(testTimes$pre_safetyscan_date_convert, " ", testTimes$consump_start_time_hr, ":", testTimes$consump_start_time_min, ":", "00"), format = "%Y-%m-%d %H:%M:%S")
testTimes$consump_end_time_convert <- as.POSIXct(paste0(testTimes$pre_safetyscan_date_convert, " ", testTimes$consump_end_time_hr, ":", testTimes$consump_end_time_min, ":", "00"), format = "%Y-%m-%d %H:%M:%S")
testTimes$post_safetyscan_time_convert <- as.POSIXct(paste0(testTimes$pre_safetyscan_date_convert, " ", testTimes$post_safetyscan_time_hr, ":", testTimes$post_safetyscan_time_min, ":", "00"), format = "%Y-%m-%d %H:%M:%S")
testTimes$postConsumpTimeToTest <- as.numeric(difftime(testTimes$post_safetyscan_time_convert,
testTimes$consump_end_time_convert,
units = "mins"))
testTimes$remove <- ifelse(testTimes$subject_id == "001-056" & is.na(testTimes$postConsumpTimeToTest), 1, 0)
testTimes <- testTimes[testTimes$remove == 0, ]
pt.df <- merge(pt.df.w, testTimes[, c("subject_id", "postConsumpTimeToTest")],
by = "subject_id")
ps <- merge(ps, testTimes[, c("subject_id", "postConsumpTimeToTest")],
by = "subject_id")
non_user.id <- pt.df$subject_id[pt.df$user_cat == "non-user"]
# View(testTimes[testTimes$subject_id %in% non_user.id, ])
No Consumption end time recorded for non-users
# Demographics Table by User Category
table1(~ demo_age + demo_weight+ demo_height+BMI + demo_gender + thc_chg + postConsumpTimeToTest + min_constriction_chg + AUC_chg + duration_chg + slope_chg + avg_end_percent_chg|user_cat,
data = pt.df)
| non-user (N=32) |
occasional (N=36) |
daily (N=33) |
Overall (N=101) |
|
|---|---|---|---|---|
| demo_age | ||||
| Mean (SD) | 32.9 (4.90) | 31.6 (4.98) | 33.5 (5.69) | 32.6 (5.21) |
| Median [Min, Max] | 33.5 [25.7, 42.2] | 30.1 [25.1, 41.9] | 32.6 [25.4, 45.3] | 32.3 [25.1, 45.3] |
| demo_weight | ||||
| Mean (SD) | 171 (48.5) | 173 (33.4) | 176 (33.1) | 173 (38.4) |
| Median [Min, Max] | 165 [85.0, 284] | 170 [105, 240] | 175 [113, 250] | 170 [85.0, 284] |
| demo_height | ||||
| Mean (SD) | 68.0 (4.83) | 69.6 (3.60) | 68.1 (3.52) | 68.6 (4.04) |
| Median [Min, Max] | 67.0 [60.0, 78.0] | 69.5 [61.0, 76.0] | 69.0 [59.0, 76.0] | 69.0 [59.0, 78.0] |
| BMI | ||||
| Mean (SD) | 25.5 (4.89) | 25.0 (4.02) | 26.7 (4.42) | 25.7 (4.46) |
| Median [Min, Max] | 24.5 [16.6, 34.2] | 24.5 [16.9, 32.6] | 25.8 [18.9, 34.9] | 25.1 [16.6, 34.9] |
| demo_gender | ||||
| Male | 13 (40.6%) | 23 (63.9%) | 18 (54.5%) | 54 (53.5%) |
| Female | 19 (59.4%) | 13 (36.1%) | 15 (45.5%) | 47 (46.5%) |
| thc_chg | ||||
| Mean (SD) | 0 (0) | 8.20 (9.71) | 29.9 (31.9) | 11.7 (21.9) |
| Median [Min, Max] | 0 [0, 0] | 5.73 [0, 46.6] | 17.8 [1.32, 124] | 3.93 [0, 124] |
| Missing | 2 (6.3%) | 7 (19.4%) | 8 (24.2%) | 17 (16.8%) |
| postConsumpTimeToTest | ||||
| Mean (SD) | NA (NA) | 64.2 (6.52) | 61.2 (4.87) | 62.7 (5.95) |
| Median [Min, Max] | NA [NA, NA] | 64.0 [54.0, 84.0] | 60.0 [53.0, 74.0] | 62.0 [53.0, 84.0] |
| Missing | 32 (100%) | 0 (0%) | 0 (0%) | 32 (31.7%) |
| min_constriction_chg | ||||
| Mean (SD) | 6.22 (8.36) | 13.3 (10.5) | 11.4 (9.49) | 10.3 (9.87) |
| Median [Min, Max] | 7.35 [-14.6, 19.6] | 14.1 [-15.1, 34.4] | 9.92 [-1.72, 35.8] | 11.1 [-15.1, 35.8] |
| Missing | 3 (9.4%) | 6 (16.7%) | 8 (24.2%) | 17 (16.8%) |
| AUC_chg | ||||
| Mean (SD) | 3.06 (9.51) | 6.63 (7.82) | 7.67 (7.78) | 5.71 (8.56) |
| Median [Min, Max] | 4.21 [-21.6, 21.2] | 5.36 [-13.5, 23.5] | 5.46 [-1.82, 36.0] | 5.36 [-21.6, 36.0] |
| Missing | 3 (9.4%) | 6 (16.7%) | 8 (24.2%) | 17 (16.8%) |
| duration_chg | ||||
| Mean (SD) | -6.48 (58.0) | -16.4 (78.2) | -3.96 (42.9) | -9.29 (61.9) |
| Median [Min, Max] | -10.0 [-145, 152] | -8.00 [-219, 198] | -2.00 [-105, 108] | -7.00 [-219, 198] |
| Missing | 3 (9.4%) | 6 (16.7%) | 8 (24.2%) | 17 (16.8%) |
| slope_chg | ||||
| Mean (SD) | -0.0239 (0.0348) | -0.0398 (0.0384) | -0.0323 (0.0523) | -0.0321 (0.0420) |
| Median [Min, Max] | -0.0224 [-0.0959, 0.0727] | -0.0321 [-0.136, 0.0321] | -0.0259 [-0.176, 0.0372] | -0.0290 [-0.176, 0.0727] |
| Missing | 3 (9.4%) | 6 (16.7%) | 8 (24.2%) | 17 (16.8%) |
| avg_end_percent_chg | ||||
| Mean (SD) | 0.687 (9.11) | 0.836 (5.84) | 4.54 (9.08) | 1.89 (8.17) |
| Median [Min, Max] | 1.02 [-26.8, 20.7] | 0.627 [-14.6, 13.7] | 2.70 [-5.32, 42.3] | 1.30 [-26.8, 42.3] |
| Missing | 3 (9.4%) | 6 (16.7%) | 8 (24.2%) | 17 (16.8%) |
I’m looking for any sharp declines over 10 frames after the 150th frame and then visualizing those trajectories to determine if there might be misalignment of the light test start frame.
ps$lagPctChg <- c(rep(0, 10), diff(ps$percent_change, lag = 10))
ps$lagID <- dplyr::lag(ps$subject_id, 10)
ps$lagtime <- dplyr::lag(ps$time, 10)
ps$lagEye <- dplyr::lag(ps$eye, 10)
ps$lagPctChg <- ifelse(ps$subject_id == ps$lagID & ps$time == ps$lagtime & ps$eye == ps$lagEye,
ps$lagPctChg, 0)
summary(ps$lagPctChg[ps$frame > 150])
hist(ps$lagPctChg[ps$frame > 150], breaks = 1000)
ps.150 <- ps[ps$frame > 150 & ps$eye == "Right", ]
pot.misaligned <- unique(ps.150[ps.150$lagPctChg <= -5, c("subject_id", "time", "user_type", "eye")])
pot.misaligned <- pot.misaligned[!(is.na(pot.misaligned$subject_id)), ]
pot.misaligned$misaligned <- 1
misaligned <- merge(ps, pot.misaligned,
by= c("subject_id", "time", "user_type", "eye"),
all.y = T)
mis.right <- misaligned[misaligned$eye == "Right", ]
misAlign.id <- unique(misaligned$subject_id)
for(i in misAlign.id){
print(ggplot(mis.right[mis.right$subject_id == i, ],
aes(x=frame, y = percent_change,
group = subject_id, color = i))+
geom_line()+
facet_grid(rows = vars(subject_id), cols = vars(tp))+
labs(title = paste0("Potential MisAligned Subject: ", i))+
theme_bw())
}
# jpeg(file.path(ps_folder, "Potential_Misaligned_001_109.jpg"),
# width = 12, height = 5, units = "in", res = 300)
# ggplot(mis.right[mis.right$subject_id == "001-109", ],
# aes(x=frame, y = percent_change, group = subject_id, color = subject_id))+
# geom_line()+
# facet_grid(rows = vars(subject_id), cols = vars(tp))+
# labs(title = "Potential MisAligned ")+
# theme_bw()
# dev.off()
# jpeg(file.path(ps_folder, "Potential_Misaligned_001_060.jpg"),
# width = 12, height = 5, units = "in", res = 300)
# ggplot(mis.right[mis.right$subject_id == "001-060", ],
# aes(x=frame, y = percent_change, group = subject_id, color = subject_id))+
# geom_line()+
# facet_grid(rows = vars(subject_id), cols = vars(tp))+
# labs(title = "Potential MisAligned ")+
# theme_bw()
# dev.off()
### New alignment view
# jpeg(file.path(ps_folder, "Potential_Misaligned_001_007.jpg"),
# width = 12, height = 5, units = "in", res = 300)
# ggplot(mis.right[mis.right$subject_id == "001-007", ],
# aes(x=frame, y = percent_change, group = subject_id, color = subject_id))+
# geom_line()+
# facet_grid(rows = vars(subject_id), cols = vars(tp))+
# labs(title = "Potential MisAligned: start at 2nd bump?")+
# theme_bw()
# dev.off()
#
# jpeg(file.path(ps_folder, "Potential_Misaligned_001_033.jpg"),
# width = 12, height = 5, units = "in", res = 300)
# ggplot(mis.right[mis.right$subject_id == "001-033", ],
# aes(x=frame, y = percent_change, group = subject_id, color = subject_id))+
# geom_line()+
# facet_grid(rows = vars(subject_id), cols = vars(tp))+
# labs(title = "Potential MisAligned: Odd pattern")+
# theme_bw()
# dev.off()
#
# jpeg(file.path(ps_folder, "Potential_Misaligned_001_038.jpg"),
# width = 12, height = 5, units = "in", res = 300)
# ggplot(mis.right[mis.right$subject_id == "001-038", ],
# aes(x=frame, y = percent_change, group = subject_id, color = subject_id))+
# geom_line()+
# facet_grid(rows = vars(subject_id), cols = vars(tp))+
# labs(title = "Potential MisAligned: Odd pattern")+
# theme_bw()
# dev.off()
#
# jpeg(file.path(ps_folder, "Potential_Misaligned_001_043.jpg"),
# width = 12, height = 5, units = "in", res = 300)
# ggplot(mis.right[mis.right$subject_id == "001-043", ],
# aes(x=frame, y = percent_change, group = subject_id, color = subject_id))+
# geom_line()+
# facet_grid(rows = vars(subject_id), cols = vars(tp))+
# labs(title = "Potential MisAligned: Both look odd, but mainly check pre; maybe review video")+
# theme_bw()
# dev.off()
Remove Outliers
## Remove known outliers
ps$outliers <- 0
# ps$outliers[ps$subject_id == "001-109" & ps$tp == "pre2"] <- 1 # Ben revised
ps$outliers[ps$subject_id == "001-060" & ps$tp == "post"] <- 1
ps <- ps[ps$outliers == 0, ]
Plot of Pupil Size and Percent Change for “PRE” data by Eye and User Category
pre.df <- ps[ps$tp == "pre2", ]
ggplot(pre.df, aes(x=frame, y = pupil_size, group = subject_id))+
geom_line(alpha = 0.5, show.legend = FALSE)+
facet_grid(rows = vars(user_cat), cols = vars(eye))+
labs(title ="Pupil Size by Eye and THC use category")+
theme_bw()
ggplot(pre.df, aes(x=frame, y = percent_change, group = subject_id))+
geom_line(alpha = 0.5, show.legend = FALSE)+
facet_grid(rows = vars(user_cat), cols = vars(eye))+
labs(title = "Percent Change by Eye and THC use category")+
theme_bw()
Plot of PRE/POST for Right Eye
right.df <- ps[ps$eye == "Right", ]
ggplot(right.df, aes(x=frame, y = pupil_size, group = subject_id))+
geom_line(show.legend = FALSE, alpha = 0.5)+
facet_grid(rows = vars(user_cat), cols = vars(tp))+
labs(title ="Pupil Size by Pre/Post and THC use category")+
theme_bw()
ggplot(right.df, aes(x=frame, y = percent_change, group = subject_id))+
geom_line(show.legend = FALSE, , alpha = 0.5)+
facet_grid(rows = vars(user_cat), cols = vars(tp))+
labs(title = "Percent Change by Pre/Post and THC use category")+
theme_bw()
Plots of Left and Right Eye for “POST” data. One pt has negative values for pupil size.
post.df <- ps[ps$tp == "post", ]
ggplot(post.df, aes(x=frame, y = pupil_size, group = subject_id))+
geom_line(show.legend = FALSE, alpha = 0.5)+
facet_grid(rows = vars(user_cat), cols = vars(eye))+
labs(title ="Pupil Size by Eye and THC use category")+
theme_bw()
ggplot(post.df, aes(x=frame, y = percent_change, group = subject_id))+
geom_line(show.legend = FALSE, alpha = 0.5)+
facet_grid(rows = vars(user_cat), cols = vars(eye))+
labs(title = "Percent Change by Eye and THC use category")+
theme_bw()
Plot the Mean and +/- 1 SD functions for the percent change in pupil light reflex data for the pre and post data by THC user category. (Added code to input frame numbers when NA).
right_0.df <- right.df[right.df$frame >= 0, c("subject_id", "tp", "user_cat",
"frame", "percent_change")]
right_0.df <- right_0.df[order(right_0.df$subject_id, right_0.df$tp, right_0.df$frame), ]
right_0.w <- reshape(right_0.df,
timevar = "frame",
idvar = c("subject_id", "tp", "user_cat"),
direction = "wide")
rownames(right_0.w) <- paste0(right_0.w$subject_id, "_", right_0.w$tp)
pct_chg <- names(right_0.w[, 4:602])
mean_fxn <- as.data.frame(aggregate(right_0.w[, 4:602],
list(right_0.w$tp,
right_0.w$user_cat),
FUN = function(x) mean(x, na.rm = T)))
mean_fxn.l <- reshape(mean_fxn,
varying = pct_chg,
v.names = "percent_change",
timevar = "frame",
times = pct_chg,
direction = "long")
mean_fxn.l$frame <- as.numeric(gsub("percent_change.", "", mean_fxn.l$frame))
rownames(mean_fxn.l) <- NULL
mean_fxn.l$id <- NULL
names(mean_fxn.l)[names(mean_fxn.l) == "Group.1"] <- "tp"
names(mean_fxn.l)[names(mean_fxn.l) == "Group.2"] <- "user_cat"
mean_fxn.l$grp <- paste0(mean_fxn.l$tp, "_", mean_fxn.l$user_cat)
ggplot(mean_fxn.l, aes(x=frame, y = percent_change, group = grp,
color = user_cat))+
geom_line()+
facet_grid(rows = vars(tp))+
labs(title = "Average Percent Change by Pre/Post and THC use category")+
theme_bw()
## Warning: Removed 449 row(s) containing missing values (geom_path).
sd_fxn <- as.data.frame(aggregate(right_0.w[, 4:602],
list(right_0.w$tp,
right_0.w$user_cat),
FUN = function(x) sd(x, na.rm = T)))
NA’s are when there’s no data in for that time point and user category. Min frame value for NA is 480.
Truncating to frame 400
right_0.fpca <- fpca.face(Y = as.matrix(right_0.w[, 4:404]), pve = 0.95, knots = 30, var = T)
# str(right_0.fpca)
# plot_shiny(right_0.fpca)
right_0.fpca.pve <- round(right_0.fpca$evalues/sum(right_0.fpca$evalues)*100, 2)
# right_0.fpca.pve
mu <- right_0.fpca$mu
right_sd <- sqrt(right_0.fpca$evalues)
right_Phi <- right_0.fpca$efunctions
df_plot <- data.frame(frame = seq(0, 400, by = 1),
mu = mu,
errband1 = 2*right_Phi[, 1]*right_sd[1],
errband2 = 2*right_Phi[, 2]*right_sd[2],
errband3 = 2*right_Phi[, 3]*right_sd[3],
errband4 = 2*right_Phi[, 4]*right_sd[4],
errband5 = 2*right_Phi[, 5]*right_sd[5])
colors <- c("Pop.Mean" = "black", "+2SD" = "red", "-2SD" = "blue")
plot_pc1 <- ggplot(data = df_plot, aes(x = frame, y = mu))+
geom_line(aes(color = "Pop.Mean"))+
geom_line(aes(x = frame, y = mu+errband1, color = "+2SD"))+
geom_line(aes(x = frame, y = mu-errband1, color = "-2SD"))+
labs(x = "frame",
y = "Percent Change",
color = "Legend",
title = paste("PC1:", "% Var Explained:", right_0.fpca.pve[1]))+
scale_color_manual(values = colors)+
theme_bw()
plot_pc2 <- ggplot(data = df_plot, aes(x = frame, y = mu))+
geom_line(aes(color = "Pop.Mean"))+
geom_line(aes(x = frame, y = mu+errband2, color = "+2SD"))+
geom_line(aes(x = frame, y = mu-errband2, color = "-2SD"))+
labs(x = "frame",
y = "Percent Change",
color = "Legend",
title = paste("PC2:", "% Var Explained:", right_0.fpca.pve[2]))+
scale_color_manual(values = colors)+
theme_bw()
plot_pc3 <- ggplot(data = df_plot, aes(x = frame, y = mu))+
geom_line(aes(color = "Pop.Mean"))+
geom_line(aes(x = frame, y = mu+errband3, color = "+2SD"))+
geom_line(aes(x = frame, y = mu-errband3, color = "-2SD"))+
labs(x = "frame",
y = "Percent Change",
color = "Legend",
title = paste("PC3:", "% Var Explained:", right_0.fpca.pve[3]))+
scale_color_manual(values = colors)+
theme_bw()
plot_pc4 <- ggplot(data = df_plot, aes(x = frame, y = mu))+
geom_line(aes(color = "Pop.Mean"))+
geom_line(aes(x = frame, y = mu+errband4, color = "+2SD"))+
geom_line(aes(x = frame, y = mu-errband4, color = "-2SD"))+
labs(x = "frame",
y = "Percent Change",
color = "Legend",
title = paste("PC4:", "% Var Explained:", right_0.fpca.pve[4]))+
scale_color_manual(values = colors)+
theme_bw()
plot_pc5 <- ggplot(data = df_plot, aes(x = frame, y = mu))+
geom_line(aes(color = "Pop.Mean"))+
geom_line(aes(x = frame, y = mu+errband5, color = "+2SD"))+
geom_line(aes(x = frame, y = mu-errband5, color = "-2SD"))+
labs(x = "frame",
y = "Percent Change",
color = "Legend",
title = paste("PC5:", "% Var Explained:", right_0.fpca.pve[5]))+
scale_color_manual(values = colors)+
theme_bw()
legend_postPC <- g_legend(plot_pc1)
blank <- grid.rect(gp=gpar(col="white"))
grid.arrange(plot_pc1+theme(legend.position = "none"),
plot_pc2+theme(legend.position = "none"),
plot_pc3+theme(legend.position = "none"),
plot_pc4+theme(legend.position = "none"),
plot_pc5+theme(legend.position = "none"),
blank,
legend_postPC,
layout_matrix = rbind(c(1,2,3,7), c(4,5, 6 , 7)),
widths = c(10, 10, 10, 3),
nrow = 2, ncol = 4)
PC1 plot: Individuals with low loading on PC1 (-2SD) have less constriction than the average and individuals with a high loading (+2SD) have more constriction. Rebound effect?
PC2 plot: Overall shape of trajectory & pattern of recovery
Plot individuals with high/low PC2 overall scores.
right_scores <- right_0.fpca$scores
q.95 <- quantile(right_scores[, 2], p = 0.95)
highPC2 <- rownames(right_scores)[right_scores[, 2] > q.95]
highPC2.df <- data.frame(subject_id = substr(highPC2, 1, 7),
tp = substr(highPC2, 9,12))
for(i in 1:nrow(highPC2.df)){
plot.df <- right_0.df[right_0.df$subject_id == highPC2.df$subject_id[i] & right_0.df$tp == highPC2.df$tp[i], ]
print(ggplot(plot.df, aes(x=frame, y = percent_change, group = subject_id, color = subject_id))+
geom_line(show.legend = FALSE)+
labs(title = paste("Percent Change for high PC2 score --", "Subject:", highPC2.df$subject_id[i], "Timepoint:", highPC2.df$tp[i]))+
theme_bw())
}
q.05 <- quantile(right_scores[, 2], p = 0.05)
lowPC2 <- rownames(right_scores)[right_scores[, 2] < q.05]
lowPC2.df <- data.frame(subject_id = substr(lowPC2, 1, 7),
tp = substr(lowPC2, 9,12))
for(i in 1:nrow(lowPC2.df)){
plot.df <- right_0.df[right_0.df$subject_id == lowPC2.df$subject_id[i] & right_0.df$tp == lowPC2.df$tp[i], ]
print(ggplot(plot.df, aes(x=frame, y = percent_change, group = subject_id, color = subject_id))+
geom_line(show.legend = FALSE)+
labs(title = paste("Percent Change for low PC2 score --", "Subject:", lowPC2.df$subject_id[i], "Timepoint:", lowPC2.df$tp[i]))+
theme_bw())
}
Make sure datasets have the same participants and are truncated at frame 400
right_0.post <- right_0.df[right_0.df$tp == "post", ]
right_0.post <- right_0.post[right_0.post$frame <= 400, ]
post.ids <- unique(right_0.post$subject_id)
right_0.post <- right_0.post[order(right_0.post$subject_id, right_0.post$frame), ]
right_0.post$frame_char <- str_pad(right_0.post$frame, width = 3, side = c("left"), pad = "0")
right_0.post.w <- reshape(right_0.post[, c("subject_id","tp", "user_cat", "frame_char", "percent_change")],
timevar = "frame_char",
idvar = c("subject_id", "tp", "user_cat"),
direction = "wide")
var.order <- c(names(right_0.post.w)[1:3], sort(names(right_0.post.w)[4:length(names(right_0.post.w))]))
right_0.post.w <- right_0.post.w[, var.order]
var.names <- names(right_0.post.w[4:ncol(right_0.post.w)])
right_0.post <- reshape(right_0.post.w,
varying = var.names,
v.names = "percent_change",
timevar = "frame",
times = 0:400,
idvar = c("subject_id", "tp", "user_cat"),
direction = "long")
right_0.pt <- reshape(right_0.df[right_0.df$frame <= 400,],
timevar = "tp",
idvar = c("subject_id", "user_cat", "frame"),
direction = "wide")
right_0.pt$wiPctChg <- right_0.pt$percent_change.post - right_0.pt$percent_change.pre2
right_0.pt <- right_0.pt[, c("subject_id", "user_cat", "frame", "wiPctChg")]
right_0.pt <- right_0.pt[order(right_0.pt$subject_id, right_0.pt$frame), ]
right_0.pt$frame_char <- str_pad(right_0.pt$frame, width = 3, side = c("left"), pad = "0")
right_0.pt.w <- reshape(right_0.pt[, c("subject_id","user_cat", "frame_char", "wiPctChg")],
timevar = "frame_char",
idvar = c("subject_id", "user_cat"),
direction = "wide")
var.order2 <- c(names(right_0.pt.w)[1:2], sort(names(right_0.pt.w)[3:length(names(right_0.pt.w))]))
right_0.pt.w <- right_0.pt.w[, var.order2]
# remove rows where all data is missing (e.g. pt didn't have both pre and post)
allMissing <- rowSums(is.na(right_0.pt.w[, 3:403]))
rowsAllMissing <- names(allMissing)[allMissing == 401]
right_0.pt.w <- right_0.pt.w[!(rownames(right_0.pt.w) %in% rowsAllMissing), ]
rownames(right_0.pt.w) <- paste0(right_0.pt.w$subject_id, "_", right_0.pt.w$user_cat)
analytic.N <- post.ids[post.ids %in% right_0.pt.w$subject_id]
## Filter all datasets to analytic sample
right_0.df <- right_0.df[right_0.df$subject_id %in% analytic.N,]
right_0.post <- right_0.post[right_0.post$subject_id %in% analytic.N, ]
right_0.post.w <- right_0.post.w[right_0.post.w$subject_id %in% analytic.N ,]
row.names(right_0.post.w) <- right_0.post.w$subject_id
right_0.pt <- right_0.pt[right_0.pt$subject_id %in% analytic.N, ]
right_0.pt.w <- right_0.pt.w[right_0.pt.w$subject_id %in% analytic.N, ]
row.names(right_0.pt.w) <- right_0.pt.w$subject_id
No Consumption end time recorded for non-users
# Demographics Table by User Category
table1(~demo_age + demo_weight+ demo_height+BMI + demo_gender + thc_chg + postConsumpTimeToTest + min_constriction_chg + AUC_chg + duration_chg + slope_chg + avg_end_percent_chg|user_cat,
data = pt.df[pt.df$subject_id %in% analytic.N, ])
| non-user (N=29) |
occasional (N=30) |
daily (N=25) |
Overall (N=84) |
|
|---|---|---|---|---|
| demo_age | ||||
| Mean (SD) | 32.3 (4.70) | 31.1 (4.75) | 32.8 (5.71) | 32.0 (5.02) |
| Median [Min, Max] | 31.1 [25.7, 42.2] | 29.7 [25.1, 41.9] | 32.0 [25.4, 45.3] | 31.1 [25.1, 45.3] |
| demo_weight | ||||
| Mean (SD) | 167 (48.8) | 169 (34.0) | 180 (29.8) | 172 (38.7) |
| Median [Min, Max] | 162 [85.0, 284] | 165 [105, 240] | 175 [125, 250] | 169 [85.0, 284] |
| demo_height | ||||
| Mean (SD) | 68.0 (4.97) | 69.5 (3.84) | 68.5 (3.40) | 68.7 (4.16) |
| Median [Min, Max] | 67.0 [60.0, 78.0] | 69.5 [61.0, 76.0] | 69.0 [63.0, 76.0] | 69.0 [60.0, 78.0] |
| BMI | ||||
| Mean (SD) | 24.9 (4.72) | 24.5 (3.96) | 27.1 (4.26) | 25.4 (4.41) |
| Median [Min, Max] | 23.9 [16.6, 34.1] | 24.3 [16.9, 32.5] | 26.8 [19.0, 34.9] | 24.7 [16.6, 34.9] |
| demo_gender | ||||
| Male | 13 (44.8%) | 20 (66.7%) | 16 (64.0%) | 49 (58.3%) |
| Female | 16 (55.2%) | 10 (33.3%) | 9 (36.0%) | 35 (41.7%) |
| thc_chg | ||||
| Mean (SD) | 0 (0) | 8.20 (9.71) | 29.9 (31.9) | 11.9 (22.0) |
| Median [Min, Max] | 0 [0, 0] | 5.73 [0, 46.6] | 17.8 [1.32, 124] | 3.96 [0, 124] |
| Missing | 0 (0%) | 1 (3.3%) | 0 (0%) | 1 (1.2%) |
| postConsumpTimeToTest | ||||
| Mean (SD) | NA (NA) | 63.9 (6.26) | 60.2 (3.78) | 62.2 (5.57) |
| Median [Min, Max] | NA [NA, NA] | 63.5 [54.0, 84.0] | 60.0 [53.0, 67.0] | 62.0 [53.0, 84.0] |
| Missing | 29 (100%) | 0 (0%) | 0 (0%) | 29 (34.5%) |
| min_constriction_chg | ||||
| Mean (SD) | 6.22 (8.36) | 13.3 (10.5) | 11.4 (9.49) | 10.3 (9.87) |
| Median [Min, Max] | 7.35 [-14.6, 19.6] | 14.1 [-15.1, 34.4] | 9.92 [-1.72, 35.8] | 11.1 [-15.1, 35.8] |
| AUC_chg | ||||
| Mean (SD) | 3.06 (9.51) | 6.63 (7.82) | 7.67 (7.78) | 5.71 (8.56) |
| Median [Min, Max] | 4.21 [-21.6, 21.2] | 5.36 [-13.5, 23.5] | 5.46 [-1.82, 36.0] | 5.36 [-21.6, 36.0] |
| duration_chg | ||||
| Mean (SD) | -6.48 (58.0) | -16.4 (78.2) | -3.96 (42.9) | -9.29 (61.9) |
| Median [Min, Max] | -10.0 [-145, 152] | -8.00 [-219, 198] | -2.00 [-105, 108] | -7.00 [-219, 198] |
| slope_chg | ||||
| Mean (SD) | -0.0239 (0.0348) | -0.0398 (0.0384) | -0.0323 (0.0523) | -0.0321 (0.0420) |
| Median [Min, Max] | -0.0224 [-0.0959, 0.0727] | -0.0321 [-0.136, 0.0321] | -0.0259 [-0.176, 0.0372] | -0.0290 [-0.176, 0.0727] |
| avg_end_percent_chg | ||||
| Mean (SD) | 0.687 (9.11) | 0.836 (5.84) | 4.54 (9.08) | 1.89 (8.17) |
| Median [Min, Max] | 1.02 [-26.8, 20.7] | 0.627 [-14.6, 13.7] | 2.70 [-5.32, 42.3] | 1.30 [-26.8, 42.3] |
post.user <- ggplot(right_0.post, aes(x=frame, y = percent_change, group = subject_id))+
geom_line(show.legend = FALSE, alpha = 0.5)+
facet_grid(rows = vars(user_cat))+
labs(title ="POST data percent change by THC use category")+
theme_bw()
Calculate the difference (post-pre) within subjects and plot by THC user category
wi.user <- ggplot(right_0.pt, aes(x=frame, y = wiPctChg, group = subject_id))+
geom_line(show.legend = FALSE, alpha = 0.5)+
facet_grid(rows = vars(user_cat))+
labs(title ="Within subject difference in percent change by THC use category")+
theme_bw()
grid.arrange(post.user, wi.user, nrow = 1)
Non-user plot seems “centered” around 0 but still showing
heterogeneity. Lots of heterogeneity across user-groups, but a mostly
positive difference.
mean_fxn.pt <- as.data.frame(aggregate(right_0.pt.w[, 3:403],
list(right_0.pt.w$user_cat),
FUN = function(x) mean(x, na.rm = T)))
wiPctChg <- names(right_0.pt.w)[3:403]
mean_fxn.pt.l <- reshape(mean_fxn.pt,
varying = wiPctChg,
v.names = "wi_percent_change",
timevar = "frame",
times = wiPctChg,
direction = "long")
mean_fxn.pt.l$frame <- as.numeric(gsub("wiPctChg.", "", mean_fxn.pt.l$frame))
rownames(mean_fxn.pt.l) <- NULL
mean_fxn.pt.l$id <- NULL
names(mean_fxn.pt.l)[names(mean_fxn.pt.l) == "Group.1"] <- "user_cat"
ggplot(mean_fxn.pt.l, aes(x=frame, y = wi_percent_change, group = user_cat, color = user_cat))+
geom_line()+
labs(title ="Average Within subject difference in percent change by THC use category")+
theme_bw()
Difference in trajectories (post-pre), show a distinct difference between non-user and both user groups (up to frame 200), but the differences might not be significant. Harder to distinguish between the occasional and daily user trajectories.
# Check to make sure there are significant numbers in each user category
table(right_0.pt.w$user_cat)
##
## non-user occasional daily
## 29 30 25
sd_fxn.pt <- as.data.frame(aggregate(right_0.pt.w[, 3:403],
list(right_0.pt.w$user_cat),
FUN = function(x) sd(x, na.rm = T)))
wiPctChg <- names(right_0.pt.w)[3:403]
sd_fxn.pt.l <- reshape(sd_fxn.pt,
varying = wiPctChg,
v.names = "wi_percent_change",
timevar = "frame",
times = wiPctChg,
direction = "long")
sd_fxn.pt.l$frame <- as.numeric(gsub("wiPctChg.", "", sd_fxn.pt.l$frame))
rownames(sd_fxn.pt.l) <- NULL
sd_fxn.pt.l$id <- NULL
names(sd_fxn.pt.l)[names(sd_fxn.pt.l) == "Group.1"] <- "user_cat"
sd_fxn.pt.l$wi_percent_change_neg <- -1*sd_fxn.pt.l$wi_percent_change
ggplot(sd_fxn.pt.l, aes(x=frame, y = wi_percent_change, group = user_cat, color = user_cat))+
geom_line()+
geom_line(aes(x=frame, y = wi_percent_change_neg, group = user_cat, color = user_cat),
linetype = "dashed")+
labs(title ="+/- 1 SD Within subject difference in percent change by THC use category")+
theme_bw()
## number of missing data points in each column
missingnessCol <- apply(right_0.pt.w[, 3:403],
MARGIN = 2,
FUN = function(x) sum(is.na(x)))
sum(missingnessCol == 0) ## 140 columns without any missing data
## [1] 143
sum(missingnessCol == 84) ## 109 columns with all missing data
## [1] 0
sum(missingnessCol <= 68) ## 438 columns where at least 80% of participants have data
## [1] 401
hist(missingnessCol, breaks = 30)
missing.df <- data.frame(frame = seq(0, 400, by =1),
missingness = round(missingnessCol/nrow(right_0.pt.w)*100, 2),
stringsAsFactors = F)
ggplot(missing.df, aes(x=frame, y= missingness))+
geom_line()
Truncate within person difference data to frame 400 where missingness
reaches 50%.
Try to visualize missing data in within person data frame
wi_pct_chg <- names(right_0.pt.w)[3:403]
right_0.pt.l <- reshape(right_0.pt.w,
varying = wi_pct_chg,
v.names = "wi_pct_chg_diff",
timevar = 'frame',
times = wi_pct_chg,
direction = "long")
right_0.pt.l$frame <- as.numeric(gsub("wiPctChg.", "", right_0.pt.l$frame))
rownames(right_0.pt.l) <- NULL
right_0.pt.l$id <- NULL
right_0.pt.l$notMissing <- ifelse(is.na(right_0.pt.l$wi_pct_chg_diff), 0, 1)
ggplot(right_0.pt.l, aes(x = as.factor(frame), y = subject_id, fill = as.factor(notMissing)))+
geom_raster(alpha = 0.8)+
scale_fill_manual(values = c("black", 'white'),
labels = c("Missing", "Present"))+
theme(axis.text.x=element_text(angle = 45, vjust = 1, hjust = 1))
right_0_wi.fpca <- fpca.face(Y = as.matrix(right_0.pt.w[, 3:403]), pve = 0.95, knots = 30, var = T)
# str(right_0.fpca)
# plot_shiny(right_0.fpca)
right_0_wi.fpca.pve <- round(right_0_wi.fpca$evalues/sum(right_0_wi.fpca$evalues)*100, 2)
mu <- right_0_wi.fpca$mu
right_sd <- sqrt(right_0_wi.fpca$evalues)
right_Phi <- right_0_wi.fpca$efunctions
wi.df_plot <- data.frame(frame = seq(0, 400, by = 1),
mu = mu,
errband1 = 2*right_Phi[, 1]*right_sd[1],
errband2 = 2*right_Phi[, 2]*right_sd[2],
errband3 = 2*right_Phi[, 3]*right_sd[3],
errband4 = 2*right_Phi[, 4]*right_sd[4],
errband5 = 2*right_Phi[, 5]*right_sd[5],
errband6 = 2*right_Phi[, 6]*right_sd[6])
colors <- c("Pop.Mean" = "black", "+2SD" = "red", "-2SD" = "blue")
wi.plot_pc1 <- ggplot(data = wi.df_plot, aes(x = frame, y = mu))+
geom_line(aes(color = "Pop.Mean"))+
geom_line(aes(x = frame, y = mu+errband1, color = "+2SD"))+
geom_line(aes(x = frame, y = mu-errband1, color = "-2SD"))+
labs(x = "frame",
y = "Percent Change",
color = "Legend",
title = paste("PC1:", "% Var Explained:", right_0_wi.fpca.pve[1]))+
scale_color_manual(values = colors)+
theme_bw()
wi.plot_pc2 <- ggplot(data = wi.df_plot, aes(x = frame, y = mu))+
geom_line(aes(color = "Pop.Mean"))+
geom_line(aes(x = frame, y = mu+errband2, color = "+2SD"))+
geom_line(aes(x = frame, y = mu-errband2, color = "-2SD"))+
labs(x = "frame",
y = "Percent Change",
color = "Legend",
title = paste("PC2:", "% Var Explained:", right_0_wi.fpca.pve[2]))+
scale_color_manual(values = colors)+
theme_bw()
wi.plot_pc3 <- ggplot(data = wi.df_plot, aes(x = frame, y = mu))+
geom_line(aes(color = "Pop.Mean"))+
geom_line(aes(x = frame, y = mu+errband3, color = "+2SD"))+
geom_line(aes(x = frame, y = mu-errband3, color = "-2SD"))+
labs(x = "frame",
y = "Percent Change",
color = "Legend",
title = paste("PC3:", "% Var Explained:", right_0_wi.fpca.pve[3]))+
scale_color_manual(values = colors)+
theme_bw()
wi.plot_pc4 <- ggplot(data = wi.df_plot, aes(x = frame, y = mu))+
geom_line(aes(color = "Pop.Mean"))+
geom_line(aes(x = frame, y = mu+errband4, color = "+2SD"))+
geom_line(aes(x = frame, y = mu-errband4, color = "-2SD"))+
labs(x = "frame",
y = "Percent Change",
color = "Legend",
title = paste("PC4:", "% Var Explained:", right_0_wi.fpca.pve[4]))+
scale_color_manual(values = colors)+
theme_bw()
wi.plot_pc5 <- ggplot(data = wi.df_plot, aes(x = frame, y = mu))+
geom_line(aes(color = "Pop.Mean"))+
geom_line(aes(x = frame, y = mu+errband5, color = "+2SD"))+
geom_line(aes(x = frame, y = mu-errband5, color = "-2SD"))+
labs(x = "frame",
y = "Percent Change",
color = "Legend",
title = paste("PC5:", "% Var Explained:", right_0_wi.fpca.pve[5]))+
scale_color_manual(values = colors)+
theme_bw()
wi.plot_pc6 <- ggplot(data = wi.df_plot, aes(x = frame, y = mu))+
geom_line(aes(color = "Pop.Mean"))+
geom_line(aes(x = frame, y = mu+errband6, color = "+2SD"))+
geom_line(aes(x = frame, y = mu-errband6, color = "-2SD"))+
labs(x = "frame",
y = "Percent Change",
color = "Legend",
title = paste("PC6:", "% Var Explained:", right_0_wi.fpca.pve[6]))+
scale_color_manual(values = colors)+
theme_bw()
legend_wiPC <- g_legend(wi.plot_pc1)
grid.arrange(wi.plot_pc1+theme(legend.position = "none"),
wi.plot_pc2+theme(legend.position = "none"),
wi.plot_pc3+theme(legend.position = "none"),
wi.plot_pc4+theme(legend.position = "none"),
wi.plot_pc5+theme(legend.position = "none"),
wi.plot_pc6+theme(legend.position = "none"),
legend_wiPC,
layout_matrix = rbind(c(1,2,3, 7), c(4, 5, 6, 7)),
widths = c(10, 10, 10, 3),
nrow = 2, ncol = 4)
PC1: Individuals with high scores on PC1 have a weaker minimal constriction at post compared to pre. Individuals with low scores on PC1 have a stronger minimal constriction at post compared to pre.
PC2: Individuals with high loading on PC2 have a weaker initial constriction at post and a stronger rebound dilation past the 200th frame. Individuals with a low loading on PC2 have a stronger initial constriction at paost and a weaker rebound dilation past the 200th frame.
wi.scores <- as.data.frame(right_0_wi.fpca$scores)
names(wi.scores) <- c("PC1", "PC2", "PC3", "PC4", "PC5", "PC6")
wi.scores$subject_id <- rownames(wi.scores)
pt.wi.scores <- merge(wi.scores, pt.df,
by = "subject_id",
all.x = T)
pt.wi.scores$smoker <- ifelse(pt.wi.scores$user_cat %in% c("daily", "occasional"),1, 0)
# pc_ind_plots <- function(scores.df, raw.df, pc_plot.df, q, pc= "PC1", id = "subject_id", pc.val = 1, pc_plot.var = "wiPctChg", raw.plot.var = "percent_change"){
# q.pc <- quantile(scores.df[, pc], probs = q)
# q.ids <- scores.df[scores.df[, pc] > q.pc, id]
#
# q.df <- pc_plot.df[pc_plot.df[, id] %in% q.ids, ]
#
# colors <- c("Pop.Mean" = "black", "+2SD" = "red", "-2SD" = "blue")
#
# print(ggplot(data = pc_plot.df, aes(x=frame, y = mu))+
# geom_line(aes(color = "Pop.Mean"))+
# geom_line(aes(x = frame, y = mu+ pc_plot.df[, 2+pc.val], color = "+2SD"))+
# geom_line(aes(x = frame, y = mu- pc_plot.df[, 2+pc.val], color = "-2SD"))+
# labs(x = "frame",
# y = "Percent Change",
# color = "Legend",
# title = paste0("Individuals in the ", q*100,"th Percentile of PC1 scores"))+
# geom_line(data = q.df, aes(x=frame, y = pc_plot.var, group = id))+
# #scale_color_manual(values = colors)+
# theme_bw())
#
# for(i in q.ids){
# print(ggplot(data = raw.df[raw.df[, id] == i, ],
# aes(x = frame, y = raw.plot.var, group = tp, color = tp))+
# geom_line()+
# labs(title = paste0(i, " Pre/Post: ", q*100 , "th Percentile PC1 scores"))+
# theme_bw())
# }
# }
# pc_ind_plots(scores.df = wi.scores,
# raw.df = right_0.df,
# pc_plot.df = right_0.pt,
# q = 0.95, pc = "PC1", id = "subject_id", pc.val = 1, pc_plot.var = "wiPctChg", raw.plot.var = "percent_change")
q.95 <- quantile(wi.scores$PC1, probs = 0.95)
pctile95.wi <- wi.scores$subject_id[wi.scores$PC1 > q.95]
pctile95.wi.df <- right_0.pt[right_0.pt$subject_id %in% pctile95.wi, ]
ggplot(data = wi.df_plot, aes(x = frame, y = mu))+
geom_line(aes(color = "Pop.Mean"))+
geom_line(aes(x = frame, y = mu+errband1, color = "+2SD"))+
geom_line(aes(x = frame, y = mu-errband1, color = "-2SD"))+
labs(x = "frame",
y = "Percent Change",
color = "Legend",
title = "Individuals in the Top 5th Percentile of PC1 scores")+
geom_line(data = pctile95.wi.df, aes(x=frame, y = wiPctChg, group = subject_id, color = subject_id))+
scale_color_manual(values = colors)+
theme_bw()
## Warning: Removed 116 row(s) containing missing values (geom_path).
for(i in pctile95.wi){
print(ggplot(data = right_0.df[right_0.df$subject_id == i, ],
aes(x = frame, y = percent_change, group = tp, color = tp))+
geom_line()+
labs(title = paste0(i, " Pre/Post: Top 5th Percentile PC1 scores"))+
theme_bw())
}
q.05 <- quantile(wi.scores$PC1, probs = 0.05)
pctile05.wi <- wi.scores$subject_id[wi.scores$PC1 < q.05]
pctile05.wi.df <- right_0.pt[right_0.pt$subject_id %in% pctile05.wi, ]
ggplot(data = wi.df_plot, aes(x = frame, y = mu))+
geom_line(aes(color = "Pop.Mean"))+
geom_line(aes(x = frame, y = mu+errband1, color = "+2SD"))+
geom_line(aes(x = frame, y = mu-errband1, color = "-2SD"))+
labs(x = "frame",
y = "Percent Change",
color = "Legend",
title = "Individuals in the 5th Percentile of PC1 scores")+
geom_line(data = pctile05.wi.df, aes(x=frame, y = wiPctChg, group = subject_id, color = subject_id))+
scale_color_manual(values = colors)+
theme_bw()
## Warning: Removed 79 row(s) containing missing values (geom_path).
for(i in pctile05.wi){
print(ggplot(data = right_0.df[right_0.df$subject_id == i, ],
aes(x = frame, y = percent_change, group = tp, color = tp))+
geom_line()+
labs(title = paste0(i, " Pre/Post: 5th Percentile PC1 scores"))+
theme_bw())
}
q.95 <- quantile(wi.scores$PC2, probs = 0.95)
pctile95.wi <- wi.scores$subject_id[wi.scores$PC2 > q.95]
pctile95.wi.df <- right_0.pt[right_0.pt$subject_id %in% pctile95.wi, ]
ggplot(data = wi.df_plot, aes(x = frame, y = mu))+
geom_line(aes(color = "Pop.Mean"))+
geom_line(aes(x = frame, y = mu+errband2, color = "+2SD"))+
geom_line(aes(x = frame, y = mu-errband2, color = "-2SD"))+
labs(x = "frame",
y = "Percent Change",
color = "Legend",
title = "Individuals in the Top 5th Percentile of PC2 scores")+
geom_line(data = pctile95.wi.df, aes(x=frame, y = wiPctChg, group = subject_id, color = subject_id))+
scale_color_manual(values = colors)+
theme_bw()
## Warning: Removed 18 row(s) containing missing values (geom_path).
for(i in pctile95.wi){
print(ggplot(data = right_0.df[right_0.df$subject_id == i, ],
aes(x = frame, y = percent_change, group = tp, color = tp))+
geom_line()+
labs(title = paste0(i, " Pre/Post: Top 5th Percentile PC2 scores"))+
theme_bw())
}
q.05 <- quantile(wi.scores$PC2, probs = 0.05)
pctile05.wi <- wi.scores$subject_id[wi.scores$PC2 < q.05]
pctile05.wi.df <- right_0.pt[right_0.pt$subject_id %in% pctile05.wi, ]
ggplot(data = wi.df_plot, aes(x = frame, y = mu))+
geom_line(aes(color = "Pop.Mean"))+
geom_line(aes(x = frame, y = mu+errband2, color = "+2SD"))+
geom_line(aes(x = frame, y = mu-errband2, color = "-2SD"))+
labs(x = "frame",
y = "Percent Change",
color = "Legend",
title = "Individuals in the 5th Percentile of PC2 scores")+
geom_line(data = pctile05.wi.df, aes(x=frame, y = wiPctChg, group = subject_id, color = subject_id))+
scale_color_manual(values = colors)+
theme_bw()
## Warning: Removed 14 row(s) containing missing values (geom_path).
for(i in pctile05.wi){
print(ggplot(data = right_0.df[right_0.df$subject_id == i, ],
aes(x = frame, y = percent_change, group = tp, color = tp))+
geom_line()+
labs(title = paste0(i, " Pre/Post: 5th Percentile PC2 scores"))+
theme_bw())
}
q.95 <- quantile(wi.scores$PC3, probs = 0.95)
pctile95.wi <- wi.scores$subject_id[wi.scores$PC3 > q.95]
pctile95.wi.df <- right_0.pt[right_0.pt$subject_id %in% pctile95.wi, ]
ggplot(data = wi.df_plot, aes(x = frame, y = mu))+
geom_line(aes(color = "Pop.Mean"))+
geom_line(aes(x = frame, y = mu+errband3, color = "+2SD"))+
geom_line(aes(x = frame, y = mu-errband3, color = "-2SD"))+
labs(x = "frame",
y = "Percent Change",
color = "Legend",
title = "Individuals in the Top 5th Percentile of PC3 scores")+
geom_line(data = pctile95.wi.df, aes(x=frame, y = wiPctChg, group = subject_id, color = subject_id))+
scale_color_manual(values = colors)+
theme_bw()
## Warning: Removed 23 row(s) containing missing values (geom_path).
for(i in pctile95.wi){
print(ggplot(data = right_0.df[right_0.df$subject_id == i, ],
aes(x = frame, y = percent_change, group = tp, color = tp))+
geom_line()+
labs(title = paste0(i, " Pre/Post: Top 5th Percentile PC3 scores"))+
theme_bw())
}
q.05 <- quantile(wi.scores$PC3, probs = 0.05)
pctile05.wi <- wi.scores$subject_id[wi.scores$PC3 < q.05]
pctile05.wi.df <- right_0.pt[right_0.pt$subject_id %in% pctile05.wi, ]
ggplot(data = wi.df_plot, aes(x = frame, y = mu))+
geom_line(aes(color = "Pop.Mean"))+
geom_line(aes(x = frame, y = mu+errband3, color = "+2SD"))+
geom_line(aes(x = frame, y = mu-errband3, color = "-2SD"))+
labs(x = "frame",
y = "Percent Change",
color = "Legend",
title = "Individuals in the 5th Percentile of PC3 scores")+
geom_line(data = pctile05.wi.df, aes(x=frame, y = wiPctChg, group = subject_id, color = subject_id))+
scale_color_manual(values = colors)+
theme_bw()
## Warning: Removed 21 row(s) containing missing values (geom_path).
for(i in pctile05.wi){
print(ggplot(data = right_0.df[right_0.df$subject_id == i, ],
aes(x = frame, y = percent_change, group = tp, color = tp))+
geom_line()+
labs(title = paste0(i, " Pre/Post: 5th Percentile PC3 scores"))+
theme_bw())
}
### INVESTIGATE BUMPS IN MEAN Function
## Pre/Post Across subjects
ggplot(mean_fxn.l, aes(x=frame, y = percent_change, group = grp,
color = user_cat))+
geom_line()+
xlim(50, 200)+
facet_grid(rows = vars(tp))+
labs(title = "Average Percent Change by Pre/Post and THC use category")+
theme_bw()
## Warning: Removed 2688 row(s) containing missing values (geom_path).
right_0.w[right_0.w$user_cat == "non-user" & right_0.w$tp == "pre2", 103:105]
## percent_change.99 percent_change.100 percent_change.101
## 001-003_pre2 -39.625323 -39.841378 NA
## 001-004_pre2 -20.384680 -20.509286 -20.636732
## 001-007_pre2 -29.058010 -29.728903 -30.372171
## 001-009_pre2 -12.603789 -12.393281 -12.182258
## 001-015_pre2 -27.535413 -27.419396 -27.277259
## 001-029_pre2 -22.530667 -22.422026 -22.324030
## 001-030_pre2 -20.085366 -19.904482 -19.730264
## 001-031_pre2 -26.584228 -26.659914 -26.707617
## 001-032_pre2 -25.426802 -25.607501 -25.790114
## 001-037_pre2 -10.775800 -10.604402 -10.431064
## 001-038_pre2 -14.966576 -14.582329 -14.220651
## 001-039_pre2 -8.984096 -8.698685 -8.442025
## 001-040_pre2 -13.956051 -13.961272 -13.965766
## 001-041_pre2 -22.808730 -23.151100 -23.486850
## 001-042_pre2 -22.433860 -22.334270 -22.256730
## 001-043_pre2 -11.999190 -11.625390 -11.267720
## 001-045_pre2 -25.080110 -24.484760 -23.910470
## 001-046_pre2 -39.336980 -39.228640 -39.117800
## 001-047_pre2 -8.596590 -8.537555 -8.481596
## 001-049_pre2 -20.259780 -20.116580 -19.984260
## 001-050_pre2 -29.778200 -29.870190 -29.993170
## 001-053_pre2 -31.737980 -31.777050 -31.811150
## 001-054_pre2 -19.115900 -19.003080 -18.907630
## 001-055_pre2 -22.066560 -22.701870 -23.326340
## 001-062_pre2 -5.447275 -5.356276 -5.269542
## 001-076_pre2 -43.356910 -43.094070 -42.798500
## 001-077_pre2 -9.460922 -9.416607 -9.337826
## 001-095_pre2 -19.089510 -18.898660 -18.708520
## 001-097_pre2 -9.101851 -9.338060 -9.588896
## 001-099_pre2 -32.345480 -32.517180 -32.697860
right_0.w[right_0.w$user_cat == "non-user" & right_0.w$tp == "post", 101:103]
## percent_change.97 percent_change.98 percent_change.99
## 001-003_post -27.591476 -27.753056 -27.913264
## 001-004_post -5.328511 -5.257832 -5.198141
## 001-005_post -21.188532 -21.013709 -20.832539
## 001-007_post -19.029031 -18.738419 -18.445158
## 001-009_post -15.151704 -15.268272 -15.431165
## 001-015_post -23.783261 -23.671938 -23.558792
## 001-029_post -15.408908 -15.334405 -15.258121
## 001-030_post -12.152430 -12.088330 -12.022690
## 001-031_post -32.895740 -32.672950 -32.429868
## 001-032_post -17.701130 -17.526118 -17.369644
## 001-037_post -7.289497 -7.295840 -7.301405
## 001-038_post -14.546489 -14.328061 -14.119133
## 001-039_post -6.231028 -6.300429 -6.357287
## 001-040_post -9.851204 -9.732384 -9.612723
## 001-041_post -11.854880 -11.783820 -11.723060
## 001-042_post -23.842370 -23.864700 -23.870620
## 001-043_post -9.485482 -9.529095 -9.569430
## 001-045_post -46.722200 -46.643350 -46.578230
## 001-046_post -25.109600 -24.943490 -24.785340
## 001-047_post -5.673392 -5.631184 -5.614140
## 001-049_post -12.073010 -12.144500 -12.199830
## 001-050_post -32.832790 -32.971580 -33.069770
## 001-053_post -18.049450 -17.935310 -17.814510
## 001-054_post -26.886900 -27.036700 -27.191480
## 001-055_post -12.280830 -12.115960 -11.958690
## 001-062_post -8.462570 -8.366234 -8.281283
## 001-076_post -51.176420 -51.349010 NA
## 001-077_post -3.110021 -3.120474 -3.135075
## 001-097_post -14.264840 -13.988710 -13.710370
## 001-099_post -15.625370 -15.639890 -15.662830
## Within person plots
ggplot(mean_fxn.pt.l, aes(x=frame, y = wi_percent_change, group = user_cat, color = user_cat))+
geom_line()+
xlim(50, 200)+
labs(title ="Average Within subject difference in percent change by THC use category")+
theme_bw()
## Warning: Removed 750 row(s) containing missing values (geom_path).
## Mean function Spike in Occassional user group
mean_fxn.pt[mean_fxn.pt$Group.1 == "occasional", 113:119]
## wiPctChg.111 wiPctChg.112 wiPctChg.113 wiPctChg.114 wiPctChg.115 wiPctChg.116
## 2 10.67527 10.68097 10.6863 9.873832 10.76751 10.69195
## wiPctChg.117
## 2 10.154
right_0.pt.w[right_0.pt.w$user_cat == "occasional", 116:119]
## wiPctChg.113 wiPctChg.114 wiPctChg.115 wiPctChg.116
## 001-006 11.470139 11.501074 11.5256630 11.5441500
## 001-011 14.108477 13.644011 13.2101213 12.8103992
## 001-014 21.189571 21.310733 21.4515436 21.6131807
## 001-021 -1.335619 -1.051763 -0.7900428 -0.5539591
## 001-026 9.817465 9.754613 9.6877039 9.6172871
## 001-033 -4.603834 -4.590481 -4.5847367 -4.5874266
## 001-061 -3.022040 -3.113458 -3.2049960 -3.2953840
## 001-066 -3.293890 -2.992660 -2.6954000 -2.4078300
## 001-068 -2.218310 -2.464190 -2.7147200 NA
## 001-069 10.412403 10.375112 10.3261620 10.2674190
## 001-070 -6.025890 -6.368030 -6.6819500 -6.9642800
## 001-073 1.316810 1.098830 0.8742100 0.6410700
## 001-074 34.601040 NA 34.4445900 34.2126300
## 001-079 1.507878 1.521615 1.5352410 1.5492310
## 001-098 13.725968 13.731739 13.7472930 13.7733770
## 001-103 13.532641 13.324760 13.1186220 12.9151740
## 001-104 12.403860 12.785490 13.1440300 13.4777300
## 001-105 6.078600 5.559630 5.0371800 4.5128900
## 001-106 31.750678 31.497264 31.2436990 30.9937400
## 001-107 13.086510 13.184770 13.2843100 13.3851400
## 001-108 0.405090 0.617610 0.8447500 1.0849700
## 001-109 25.211590 25.655630 26.0890100 26.5101800
## 001-110 20.702280 20.564430 20.4366200 20.3182100
## 001-111 24.671963 24.636681 24.5955730 NA
## 001-112 5.577910 5.439300 5.2952300 5.1482100
## 001-113 4.349280 4.602660 4.8828900 5.1874800
## 001-114 7.697860 8.313380 NA 9.0936100
## 001-116 8.925529 9.561639 10.2337310 10.9367770
## 001-117 26.002984 25.939192 25.8583240 25.7613320
## 001-118 22.542060 22.301552 22.0631290 21.8292470
## Mean function Spike in Daily user group
mean_fxn.pt[mean_fxn.pt$Group.1 == "daily", 133:149]
## wiPctChg.131 wiPctChg.132 wiPctChg.133 wiPctChg.134 wiPctChg.135 wiPctChg.136
## 3 9.586988 9.276883 9.74442 9.978503 9.75998 9.534391
## wiPctChg.137 wiPctChg.138 wiPctChg.139 wiPctChg.140 wiPctChg.141 wiPctChg.142
## 3 10.07775 10.1046 10.12753 9.170939 10.31649 10.8234
## wiPctChg.143 wiPctChg.144 wiPctChg.145 wiPctChg.146 wiPctChg.147
## 3 11.62495 9.737888 10.18799 10.1544 10.17143
right_0.pt.w[right_0.pt.w$user_cat == "daily", 143:146]
## wiPctChg.140 wiPctChg.141 wiPctChg.142 wiPctChg.143
## 001-002 14.8259138 NA 14.7153791 14.601849
## 001-008 14.7401984 14.739642 14.7359309 14.729108
## 001-010 11.2224673 11.478313 11.7425889 12.013432
## 001-012 NA 33.415378 33.3457661 33.274294
## 001-013 1.9709700 NA 2.0212900 2.017464
## 001-018 0.5882232 0.730210 0.8743158 1.019096
## 001-022 19.2040607 19.108362 19.0395858 18.998221
## 001-024 3.7929460 3.778025 3.7627633 NA
## 001-027 -1.8169757 -1.785007 -1.7402544 -1.684846
## 001-044 10.4374320 10.366936 10.2923040 10.213352
## 001-056 3.5775660 3.610453 3.6402340 3.666943
## 001-063 20.1435100 20.458040 20.7316600 20.963450
## 001-064 2.5358200 2.713260 NA 3.009630
## 001-067 11.3182940 11.248294 11.1776250 11.105867
## 001-075 14.7930500 15.038380 15.2525100 15.432590
## 001-081 20.1668400 19.957810 19.7584800 19.568360
## 001-083 6.3090300 6.543770 6.7744200 NA
## 001-085 2.4490400 2.503400 NA 2.673020
## 001-086 15.5868000 15.468720 15.3217200 15.147030
## 001-088 18.6508590 18.639526 18.5982260 18.530297
## 001-090 12.1688900 12.830380 13.4777600 14.106620
## 001-091 3.1351800 2.730030 2.3344100 1.952300
## 001-093 10.3844600 10.249920 10.0865200 9.897910
## 001-094 -11.3709900 -11.587410 -11.7885300 NA
## 001-096 15.2889500 15.042820 14.7835500 14.512860
Jagged changes in the mean function lines seems to stem from missing data at those frames in the data set. The subjects per user-group is between 25 - 30, so missing data for one individual has a noticeable effect on mean function line.
auc.df$AUC <- round(as.numeric(auc.df$AUC), 3)
knitr::kable(auc.df)
| Label | AUC |
|---|---|
| Post AUC PCs | 0.680 |
| Post AUC Scalar Feat | 0.675 |
| W/I Diff AUC PCS | 0.712 |
| W/I Diff AUC Scalar Feat | 0.681 |
pt.scores$male <- ifelse(pt.scores$demo_gender == "Male", 1, 0)
pt.scores$non_user <- ifelse(pt.scores$user_cat == "non-user", 1, 0)
pt.scores$daily <- ifelse(pt.scores$user_cat == "daily", 1, 0)
pt.scores$occasional <- ifelse(pt.scores$user_cat == "occasional", 1, 0)
pt.wi.scores$male <- ifelse(pt.wi.scores$demo_gender == "Male", 1, 0)
pt.wi.scores$non_user <- ifelse(pt.wi.scores$user_cat == "non-user", 1, 0)
pt.wi.scores$daily <- ifelse(pt.wi.scores$user_cat == "daily", 1, 0)
pt.wi.scores$occasional <- ifelse(pt.wi.scores$user_cat == "occasional", 1, 0)
Compare PCs to Scalar Features
chart.Correlation(pt.scores[, c("PC1", "PC2", "PC3", "PC4",
"min_constriction.post", "duration.post",
"avg_end_percent.post", "slope.post", "AUC.post")])
Compare PCs to demog Variables
chart.Correlation(pt.scores[, c("PC1", "PC2", "PC3", "PC4",
"demo_age","male", "thc.post", "thc_chg", "BMI",
"postConsumpTimeToTest", "smoker",
"non_user", "daily", "occasional")])
## Warning in cor(x, y, use = use, method = method): the standard deviation is zero
## Warning in cor(x, y): the standard deviation is zero
## Warning in cor(x, y, use = use, method = method): the standard deviation is zero
## Warning in cor(x, y): the standard deviation is zero
Compare Scalar Features to demog variables
chart.Correlation(pt.scores[, c("min_constriction.post", "duration.post",
"avg_end_percent.post", "slope.post", "AUC.post",
"demo_age","male", "thc.post", "thc_chg", "BMI",
"postConsumpTimeToTest", "smoker",
"non_user", "daily", "occasional")])
## Warning in cor(x, y, use = use, method = method): the standard deviation is zero
## Warning in cor(x, y): the standard deviation is zero
## Warning in cor(x, y, use = use, method = method): the standard deviation is zero
## Warning in cor(x, y): the standard deviation is zero
post.age.m <- lm(demo_age ~ PC1 + PC2 + PC3 + PC4, data = pt.scores)
summary(post.age.m)
##
## Call:
## lm(formula = demo_age ~ PC1 + PC2 + PC3 + PC4, data = pt.scores)
##
## Residuals:
## Min 1Q Median 3Q Max
## -7.6937 -4.3733 -0.2022 3.0898 12.7335
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 32.011200 0.540594 59.215 <2e-16 ***
## PC1 -0.004610 0.003968 -1.162 0.249
## PC2 -0.021284 0.011154 -1.908 0.060 .
## PC3 -0.016538 0.017203 -0.961 0.339
## PC4 0.014568 0.025662 0.568 0.572
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 4.954 on 79 degrees of freedom
## Multiple R-squared: 0.07415, Adjusted R-squared: 0.02727
## F-statistic: 1.582 on 4 and 79 DF, p-value: 0.1874
post.wt.m <- lm(demo_weight ~ PC1 + PC2 + PC3 + PC4, data = pt.scores)
summary(post.wt.m)
##
## Call:
## lm(formula = demo_weight ~ PC1 + PC2 + PC3 + PC4, data = pt.scores)
##
## Residuals:
## Min 1Q Median 3Q Max
## -73.320 -29.334 -5.384 21.773 104.274
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 171.67606 4.21236 40.755 <2e-16 ***
## PC1 -0.01093 0.03092 -0.354 0.725
## PC2 -0.11136 0.08691 -1.281 0.204
## PC3 -0.11178 0.13405 -0.834 0.407
## PC4 0.26641 0.19996 1.332 0.187
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 38.6 on 79 degrees of freedom
## Multiple R-squared: 0.05167, Adjusted R-squared: 0.003657
## F-statistic: 1.076 on 4 and 79 DF, p-value: 0.374
post.ht.m <- lm(demo_height ~ PC1 + PC2 + PC3 + PC4, data = pt.scores)
summary(post.ht.m)
##
## Call:
## lm(formula = demo_height ~ PC1 + PC2 + PC3 + PC4, data = pt.scores)
##
## Residuals:
## Min 1Q Median 3Q Max
## -7.6857 -2.8319 -0.1469 2.5711 9.4680
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 68.696451 0.459842 149.391 <2e-16 ***
## PC1 -0.002576 0.003375 -0.763 0.448
## PC2 0.007556 0.009488 0.796 0.428
## PC3 -0.002375 0.014633 -0.162 0.871
## PC4 0.015293 0.021829 0.701 0.486
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 4.214 on 79 degrees of freedom
## Multiple R-squared: 0.02156, Adjusted R-squared: -0.02799
## F-statistic: 0.4351 on 4 and 79 DF, p-value: 0.7829
post.bmi.m <- lm(BMI ~ PC1 + PC2 + PC3 + PC4, data = pt.scores)
summary(post.bmi.m)
##
## Call:
## lm(formula = BMI ~ PC1 + PC2 + PC3 + PC4, data = pt.scores)
##
## Residuals:
## Min 1Q Median 3Q Max
## -8.507 -3.132 -0.388 2.570 8.975
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 25.4102876 0.4696332 54.107 <2e-16 ***
## PC1 -0.0002663 0.0034467 -0.077 0.9386
## PC2 -0.0213975 0.0096897 -2.208 0.0301 *
## PC3 -0.0104636 0.0149449 -0.700 0.4859
## PC4 0.0366708 0.0222933 1.645 0.1040
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 4.304 on 79 degrees of freedom
## Multiple R-squared: 0.09344, Adjusted R-squared: 0.04754
## F-statistic: 2.036 on 4 and 79 DF, p-value: 0.09736
post.thc.m <- lm(thc_chg ~ PC1 + PC2 + PC3 + PC4, data = pt.scores)
summary(post.thc.m)
##
## Call:
## lm(formula = thc_chg ~ PC1 + PC2 + PC3 + PC4, data = pt.scores)
##
## Residuals:
## Min 1Q Median 3Q Max
## -15.732 -11.162 -8.510 0.007 111.022
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 11.90021 2.45021 4.857 6.02e-06 ***
## PC1 0.01353 0.01788 0.757 0.452
## PC2 0.01691 0.05025 0.336 0.737
## PC3 0.06716 0.07775 0.864 0.390
## PC4 0.05484 0.11565 0.474 0.637
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 22.32 on 78 degrees of freedom
## (1 observation deleted due to missingness)
## Multiple R-squared: 0.0205, Adjusted R-squared: -0.02973
## F-statistic: 0.4082 on 4 and 78 DF, p-value: 0.8022
post.consump.m <- lm(postConsumpTimeToTest ~ PC1 + PC2 + PC3 + PC4, data = pt.scores)
summary(post.consump.m)
##
## Call:
## lm(formula = postConsumpTimeToTest ~ PC1 + PC2 + PC3 + PC4, data = pt.scores)
##
## Residuals:
## Min 1Q Median 3Q Max
## -11.3683 -3.2531 -0.4093 2.7255 19.1823
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 62.615706 0.791539 79.106 <2e-16 ***
## PC1 -0.007270 0.006655 -1.092 0.280
## PC2 -0.029361 0.018224 -1.611 0.113
## PC3 -0.022862 0.031660 -0.722 0.474
## PC4 -0.025775 0.039435 -0.654 0.516
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 5.542 on 50 degrees of freedom
## (29 observations deleted due to missingness)
## Multiple R-squared: 0.08243, Adjusted R-squared: 0.009024
## F-statistic: 1.123 on 4 and 50 DF, p-value: 0.3563
post.male.m <- glm(male ~ PC1 + PC2 + PC3 + PC4, family = "binomial", data = pt.scores)
summary(post.male.m)
##
## Call:
## glm(formula = male ~ PC1 + PC2 + PC3 + PC4, family = "binomial",
## data = pt.scores)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -2.1805 -1.1975 0.7542 1.0493 1.3021
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 0.379276 0.231843 1.636 0.102
## PC1 -0.002915 0.001839 -1.585 0.113
## PC2 0.005303 0.005526 0.960 0.337
## PC3 -0.003385 0.007914 -0.428 0.669
## PC4 0.015188 0.011811 1.286 0.198
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 114.10 on 83 degrees of freedom
## Residual deviance: 108.63 on 79 degrees of freedom
## AIC: 118.63
##
## Number of Fisher Scoring iterations: 4
Compare PCs to Scalar Features
chart.Correlation(pt.wi.scores[, c("PC1", "PC2", "PC3", "PC4", "PC5", "PC6",
"min_constriction_chg", "AUC_chg", "duration_chg",
"avg_end_percent_chg", "slope_chg")])
Compare PCs to demog Variables
chart.Correlation(pt.wi.scores[, c("PC1", "PC2", "PC3", "PC4", "PC5", "PC6",
"demo_age","male", "thc.post", "thc_chg", "BMI",
"postConsumpTimeToTest", "smoker",
"non_user", "daily", "occasional")])
## Warning in cor(x, y, use = use, method = method): the standard deviation is zero
## Warning in cor(x, y): the standard deviation is zero
## Warning in cor(x, y, use = use, method = method): the standard deviation is zero
## Warning in cor(x, y): the standard deviation is zero
Compare Scalar Features to demog variables
chart.Correlation(pt.wi.scores[, c("min_constriction_chg", "AUC_chg", "duration_chg",
"avg_end_percent_chg", "slope_chg",
"demo_age","male", "thc.post", "thc_chg", "BMI",
"postConsumpTimeToTest", "smoker",
"non_user", "daily", "occasional")])
## Warning in cor(x, y, use = use, method = method): the standard deviation is zero
## Warning in cor(x, y): the standard deviation is zero
## Warning in cor(x, y, use = use, method = method): the standard deviation is zero
## Warning in cor(x, y): the standard deviation is zero
wi.age.m <- lm(demo_age ~ PC1 + PC2 + PC3 + PC4, data = pt.wi.scores)
summary(wi.age.m)
##
## Call:
## lm(formula = demo_age ~ PC1 + PC2 + PC3 + PC4, data = pt.wi.scores)
##
## Residuals:
## Min 1Q Median 3Q Max
## -7.9995 -3.4901 -0.7814 2.7934 13.5714
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 32.003200 0.548106 58.389 <2e-16 ***
## PC1 0.001955 0.003700 0.528 0.599
## PC2 -0.009988 0.007301 -1.368 0.175
## PC3 -0.007621 0.010019 -0.761 0.449
## PC4 -0.015410 0.013429 -1.147 0.255
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 5.022 on 79 degrees of freedom
## Multiple R-squared: 0.04855, Adjusted R-squared: 0.0003792
## F-statistic: 1.008 on 4 and 79 DF, p-value: 0.4085
wi.wt.m <- lm(demo_weight ~ PC1 + PC2 + PC3 + PC4, data = pt.wi.scores)
summary(wi.wt.m)
##
## Call:
## lm(formula = demo_weight ~ PC1 + PC2 + PC3 + PC4, data = pt.wi.scores)
##
## Residuals:
## Min 1Q Median 3Q Max
## -89.656 -25.088 -3.594 19.550 98.696
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 171.56887 4.10349 41.810 <2e-16 ***
## PC1 0.05572 0.02770 2.011 0.0477 *
## PC2 -0.06786 0.05466 -1.241 0.2181
## PC3 -0.08717 0.07501 -1.162 0.2487
## PC4 -0.13892 0.10054 -1.382 0.1709
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 37.6 on 79 degrees of freedom
## Multiple R-squared: 0.1004, Adjusted R-squared: 0.05481
## F-statistic: 2.203 on 4 and 79 DF, p-value: 0.07613
wi.ht.m <- lm(demo_height ~ PC1 + PC2 + PC3 + PC4, data = pt.wi.scores)
summary(wi.ht.m)
##
## Call:
## lm(formula = demo_height ~ PC1 + PC2 + PC3 + PC4, data = pt.wi.scores)
##
## Residuals:
## Min 1Q Median 3Q Max
## -8.040 -3.253 0.114 2.200 8.972
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 68.677459 0.454393 151.141 <2e-16 ***
## PC1 0.002129 0.003067 0.694 0.490
## PC2 -0.006215 0.006053 -1.027 0.308
## PC3 -0.004798 0.008306 -0.578 0.565
## PC4 -0.015177 0.011133 -1.363 0.177
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 4.164 on 79 degrees of freedom
## Multiple R-squared: 0.04493, Adjusted R-squared: -0.003432
## F-statistic: 0.929 on 4 and 79 DF, p-value: 0.4515
wi.bmi.m <- lm(BMI ~ PC1 + PC2 + PC3 + PC4, data = pt.wi.scores)
summary(wi.bmi.m)
##
## Call:
## lm(formula = BMI ~ PC1 + PC2 + PC3 + PC4, data = pt.wi.scores)
##
## Residuals:
## Min 1Q Median 3Q Max
## -9.7985 -2.7078 -0.9608 2.5375 8.8148
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 25.406366 0.473378 53.670 <2e-16 ***
## PC1 0.006410 0.003196 2.006 0.0483 *
## PC2 -0.005754 0.006306 -0.912 0.3643
## PC3 -0.009462 0.008653 -1.094 0.2775
## PC4 -0.010155 0.011598 -0.876 0.3840
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 4.338 on 79 degrees of freedom
## Multiple R-squared: 0.07923, Adjusted R-squared: 0.03261
## F-statistic: 1.699 on 4 and 79 DF, p-value: 0.1584
wi.thc.m <- lm(thc_chg ~ PC1 + PC2 + PC3 + PC4, data = pt.wi.scores)
summary(wi.thc.m)
##
## Call:
## lm(formula = thc_chg ~ PC1 + PC2 + PC3 + PC4, data = pt.wi.scores)
##
## Residuals:
## Min 1Q Median 3Q Max
## -19.890 -11.063 -6.853 -0.940 109.585
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 11.822803 2.423855 4.878 5.56e-06 ***
## PC1 0.023976 0.016377 1.464 0.147
## PC2 0.013886 0.032105 0.433 0.667
## PC3 0.045267 0.044172 1.025 0.309
## PC4 0.007122 0.059057 0.121 0.904
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 22.07 on 78 degrees of freedom
## (1 observation deleted due to missingness)
## Multiple R-squared: 0.04192, Adjusted R-squared: -0.007213
## F-statistic: 0.8532 on 4 and 78 DF, p-value: 0.496
wi.consump.m <- lm(postConsumpTimeToTest ~ PC1 + PC2 + PC3 + PC4, data = pt.wi.scores)
summary(wi.consump.m)
##
## Call:
## lm(formula = postConsumpTimeToTest ~ PC1 + PC2 + PC3 + PC4, data = pt.wi.scores)
##
## Residuals:
## Min 1Q Median 3Q Max
## -7.6238 -3.2794 -0.3045 2.3010 20.4482
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 61.777539 0.783866 78.811 <2e-16 ***
## PC1 0.003760 0.005617 0.669 0.506
## PC2 0.011790 0.011537 1.022 0.312
## PC3 0.024449 0.014627 1.671 0.101
## PC4 0.005318 0.017625 0.302 0.764
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 5.554 on 50 degrees of freedom
## (29 observations deleted due to missingness)
## Multiple R-squared: 0.07827, Adjusted R-squared: 0.004526
## F-statistic: 1.061 on 4 and 50 DF, p-value: 0.3854
wi.male.m <- glm(male ~ PC1 + PC2 + PC3 + PC4, family = "binomial", data = pt.wi.scores)
summary(wi.male.m)
##
## Call:
## glm(formula = male ~ PC1 + PC2 + PC3 + PC4, family = "binomial",
## data = pt.wi.scores)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -1.625 -1.184 0.667 1.008 1.676
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 0.3860208 0.2349366 1.643 0.1004
## PC1 0.0025219 0.0016332 1.544 0.1225
## PC2 -0.0068806 0.0035517 -1.937 0.0527 .
## PC3 -0.0063204 0.0047558 -1.329 0.1838
## PC4 0.0002342 0.0059885 0.039 0.9688
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 114.10 on 83 degrees of freedom
## Residual deviance: 106.34 on 79 degrees of freedom
## AIC: 116.34
##
## Number of Fisher Scoring iterations: 4
Demographic Variables:
Results:
Regression models regressed demographic variables on the 4 PCs for the post data.
Regression models regressed demographic variables on the 6 PCs for the WPD data.
\[ \begin{aligned} Y_{ij}(t) &= f_0(t) + f_1(t)*I(user = occasional) + f_2(t)*I(user = daily) + b_i(t) + \epsilon_i(t)\\ & b_i(t) \sim GP(0, \Sigma_b) \\ & \epsilon_i(t) \sim N(0, \sigma_{\epsilon}^2) \\ &= \sum_{k=1}^K \xi_{0k}\phi_{0k}(t_i) + \sum_{k=1}^K \xi_{1k}\phi_{1k}(t_i)*I(user = occasional) + \sum_{k=1}^K \xi_{2k}\phi_{2k}(t_i)*I(user = daily) + \sum_{k=1}^K \xi_{ik}\phi_k(t_i) + \epsilon_i(t)\\ \end{aligned} \]
\[ \begin{aligned} Y_{ij}(t) &= f_0(t)\\ &=\sum_{k=1}^K \xi_{0k}\phi_{0k}(t_i) \\ &= \Phi_0\xi_0 \end{aligned} \]
\[ \begin{aligned} Y_{ij}(t) &= f_0(t) + f_1(t) \\ &=\sum_{k=1}^K \xi_{0k}\phi_{0k}(t_i) + \sum_{k=1}^K \xi_{1k}\phi_{1k}(t_i)\\ &= \Phi_0\xi_0 + \Phi_1\xi_1\\ &= [ \Phi_0, \Phi_1 ] \xi \end{aligned} \]
\[ \begin{aligned} Y_{ij}(t) &= f_0(t) + f_2(t) \\ &=\sum_{k=1}^K \xi_{0k}\phi_{0k}(t_i) + \sum_{k=1}^K \xi_{2k}\phi_{2k}(t_i) \\ &= \Phi_0\xi_0 + \Phi_2\xi_2\\ &= [ \Phi_0, \Phi_2 ] \xi \end{aligned} \]
pt.analytic.df <- pt.df[pt.df$subject_id %in% analytic.N, ]
pt.analytic.df$occasional <- ifelse(pt.analytic.df$user_cat == "occasional", 1, 0)
pt.analytic.df$daily <- ifelse(pt.analytic.df$user_cat == "daily", 1, 0)
pt.analytic.df <- pt.analytic.df[order(pt.analytic.df$subject_id), ]
right_0.post.w <- right_0.post.w[order(right_0.post.w$subject_id), ]
post_pffr.df <- data.frame("subject_id" = as.factor(pt.analytic.df$subject_id),
"occ_user" = pt.analytic.df$occasional,
"daily_user" = pt.analytic.df$daily,
"pct_chg" = I(as.matrix(right_0.post.w[, c(4:404)])))
m.post_pffr <- pffr(pct_chg ~ occ_user + daily_user +s(subject_id, bs="re"),
data = post_pffr.df,
algorithm = "bam",
method = "fREML",
discrete = T)
summary(m.post_pffr)
par(mfrow = c(1, 3))
plot(m.post_pffr)
What does NA in the output mean? Did not find NA in either the matrix or other variables
# head(right_0.df)
right_0.gam.df <- merge(right_0.post, pt.analytic.df,
by = c("subject_id", "user_cat"))
right_0.gam.df <- right_0.gam.df[order(right_0.gam.df$subject_id, right_0.gam.df$frame), ]
right_0.gam.df$sind <- right_0.gam.df$frame/400
# head(right_0.gam.df)
m.post_gam <- mgcv::gam(percent_change ~ s(sind, k=30, bs="cr") + s(sind, by=occasional, k=30, bs = "cr") + s(sind, by=daily, k=30, bs = "cr"),
data = right_0.gam.df, method = "REML")
## Create a matrix of residuals
post_gam.resid <- cbind(right_0.gam.df[!(is.na(right_0.gam.df$percent_change)), c("subject_id", "frame")], m.post_gam$residuals)
names(post_gam.resid) <- c("subject_id", "frame", "resid")
post_gam.resid$frame_char <- str_pad(post_gam.resid$frame, width = 3, side = "left", pad = "0")
resid_mat <- reshape(post_gam.resid[, c("subject_id", "frame_char", "resid")],
timevar = "frame_char",
idvar = "subject_id",
direction = "wide")
rownames(resid_mat) <- resid_mat$subject_id
resid_mat$subject_id <- NULL
resid_names <- names(resid_mat)
resid_names <- sort(resid_names)
resid_mat <- resid_mat[, resid_names]
resid_mat <- as.matrix(resid_mat)
post_gam.fpca <- fpca.face(resid_mat, knots = 15)
#Create data frame of eigen functions and attach to original data
Phi_mat <- as.data.frame(post_gam.fpca$efunctions)
colnames(Phi_mat) <- paste0("Phi", seq(1, post_gam.fpca$npc))
Phi_mat$frame <- as.numeric(rownames(Phi_mat)) -1
right_0.gam.df <- merge(right_0.gam.df, Phi_mat,
by = "frame",
all.x = T)
right_0.gam.df <- right_0.gam.df[order(right_0.gam.df$subject_id, right_0.gam.df$frame), ]
right_0.gam.df$subject_id <- as.factor(right_0.gam.df$subject_id)
post_gam.fri <- mgcv::bam(percent_change ~
s(sind, k=30, bs="cr") +
s(sind, by=occasional, k=30, bs = "cr") +
s(sind, by=daily, k=30, bs = "cr") +
s(subject_id, by = Phi1, bs="re") + s(subject_id, by = Phi2, bs="re")+
s(subject_id, by = Phi3, bs="re") + s(subject_id, by = Phi4, bs="re"),
method = "fREML", data = right_0.gam.df, discrete = TRUE)
summary(post_gam.fri)
##
## Family: gaussian
## Link function: identity
##
## Formula:
## percent_change ~ s(sind, k = 30, bs = "cr") + s(sind, by = occasional,
## k = 30, bs = "cr") + s(sind, by = daily, k = 30, bs = "cr") +
## s(subject_id, by = Phi1, bs = "re") + s(subject_id, by = Phi2,
## bs = "re") + s(subject_id, by = Phi3, bs = "re") + s(subject_id,
## by = Phi4, bs = "re")
##
## Parametric coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -10.3046 0.9192 -11.21 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Approximate significance of smooth terms:
## edf Ref.df F p-value
## s(sind) 27.54 28.43 115.13 <2e-16 ***
## s(sind):occasional 18.58 21.89 70.05 <2e-16 ***
## s(sind):daily 18.61 21.94 35.47 <2e-16 ***
## s(subject_id):Phi1 82.23 84.00 22994.79 <2e-16 ***
## s(subject_id):Phi2 81.68 84.00 2568.87 <2e-16 ***
## s(subject_id):Phi3 82.23 84.00 877.76 <2e-16 ***
## s(subject_id):Phi4 81.06 84.00 1202.84 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## R-sq.(adj) = 0.966 Deviance explained = 96.6%
## fREML = 63157 Scale est. = 2.6288 n = 32558
post_gam.coef <- post_gam.fri$coefficients
post_gam.cov <- vcov.gam(post_gam.fri)
df_pred_non <- data.frame("sind" = unique(right_0.gam.df$sind), "occasional" = 0, "daily" = 0,
"Phi1" = 0, "Phi2" = 0, "Phi3" = 0,
"Phi4" = 0,
"subject_id" = right_0.gam.df$subject_id[1])
lpmat_non <- predict(post_gam.fri, newdata=df_pred_non, se.fit=TRUE, type = "lpmatrix")
df_pred_occ <- data.frame("sind" = unique(right_0.gam.df$sind), "occasional" = 1, "daily" = 0,
"Phi1" = 0, "Phi2" = 0, "Phi3" = 0,
"Phi4" = 0,
"subject_id" = right_0.gam.df$subject_id[1])
lpmat_occ <- predict(post_gam.fri, newdata=df_pred_occ, se.fit=TRUE, type = "lpmatrix")
df_pred_dly <- data.frame("sind" = unique(right_0.gam.df$sind), "occasional" = 0, "daily" = 1,
"Phi1" = 0, "Phi2" = 0, "Phi3" = 0,
"Phi4" = 0,
"subject_id" = right_0.gam.df$subject_id[1])
lpmat_dly <- predict(post_gam.fri, newdata=df_pred_dly, se.fit=TRUE, type = "lpmatrix")
pred_df <- data.frame(frame = rep(seq(0, 400), 3),
user = c(rep("non-user", 401), rep("occasional", 401),rep("daily", 401)),
mean = c(lpmat_non %*% post_gam.coef,
lpmat_occ %*% post_gam.coef, lpmat_dly %*% post_gam.coef),
se = c(sqrt(diag(lpmat_non %*% post_gam.cov %*% t(lpmat_non))),
sqrt(diag(lpmat_occ %*% post_gam.cov %*% t(lpmat_occ))),
sqrt(diag(lpmat_dly %*% post_gam.cov %*% t(lpmat_dly)))),
stringsAsFactors = F)
post_userProfile <- ggplot(data = pred_df, aes(x = frame, y = mean, group = user, col = user))+
geom_line()+
# geom_line(aes(x=frame, y = mean + 2*se, group = user, col = user), linetype = "longdash")+
# geom_line(aes(x=frame, y = mean - 2*se, group = user, col = user), linetype = "longdash")+
labs(y = "Percent Change")+
theme_bw()
legend_userProfile <- g_legend(post_userProfile)
post_userProfile.se <- ggplot(data = pred_df, aes(x = frame, y = mean, group = user, col = user))+
geom_line()+
geom_line(aes(x=frame, y = mean + 2*se, group = user, col = user), linetype = "longdash")+
geom_line(aes(x=frame, y = mean - 2*se, group = user, col = user), linetype = "longdash")+
labs(y = "")+
theme_bw()
post_userTraj <- grid.arrange(arrangeGrob(post_userProfile+theme(legend.position = "none"),
post_userProfile.se+theme(legend.position = "none"), nrow = 1),
legend_userProfile, nrow = 1,
widths = c(30, 6))
pred_f1 <- predict(post_gam.fri, newdata=df_pred_occ, se.fit=TRUE, type = "terms")
pred_f2 <- predict(post_gam.fri, newdata=df_pred_dly, se.fit=TRUE, type = "terms")
pred_coef_df <- data.frame(frame = seq(0, 400),
f0_hat = lpmat_non %*% post_gam.coef,
f0_se = sqrt(diag(lpmat_non %*% post_gam.cov %*% t(lpmat_non))),
f1_hat = pred_f1$fit[, 2],
f1_se = pred_f1$se.fit[, 2],
f2_hat = pred_f2$fit[, 3],
f2_se = pred_f2$se.fit[, 3])
post_non_plt <- ggplot(data = pred_coef_df, aes(x=frame, y = f0_hat))+
geom_line()+
geom_line(aes(x = frame, y = f0_hat + 2*f0_se), linetype = "longdash")+
geom_line(aes(x = frame, y = f0_hat - 2*f0_se), linetype = "longdash")+
labs(title = "Average Percent Change of a Non-user (Post)", ylab = "Percent Change")+
theme_bw()
post_occ_plt <- ggplot(data = pred_coef_df, aes(x=frame, y = f1_hat))+
geom_line()+
geom_line(aes(x = frame, y = f1_hat + 2*f1_se), linetype = "longdash")+
geom_line(aes(x = frame, y = f1_hat - 2*f1_se), linetype = "longdash")+
geom_line(aes(x = frame, y = 0), col = "darkred")+
labs(title = "Difference in Percent Change: Occasional and Non-user (Post)",
y= "")+
theme_bw()+
scale_x_continuous(expand = c(0, 0))+
theme(rect = element_rect(fill = "transparent"))
post_dly_plt <- ggplot(data = pred_coef_df, aes(x=frame, y = f2_hat))+
geom_line()+
geom_line(aes(x = frame, y = f2_hat + 2*f2_se), linetype = "longdash")+
geom_line(aes(x = frame, y = f2_hat - 2*f2_se), linetype = "longdash")+
geom_line(aes(x = frame, y = 0), col = "darkred")+
labs(title = "Difference in Percent Change: Daily and Non-user (Post)",
y = "")+
theme_bw()+
scale_x_continuous(expand = c(0, 0))+
theme(rect = element_rect(fill = "transparent"))
ylab <- textGrob(label = "Difference in Percent Change", rot = 90,
gp = gpar(fontsize = 12))
posval <- textGrob(label = sapply(strwrap("Less Constriction in User", width = 15, simplify = F),
paste, collapse = "\n"), rot = 0,
gp = gpar(fontsize = 8))
negval <- textGrob(label = sapply(strwrap("More Constriction in User", width = 15, simplify = F),
paste, collapse = "\n"), rot = 0,
gp = gpar(fontsize = 8))
post_occ_coef <- grid.arrange(ylab, posval, negval, post_occ_plt,
ncol = 2, nrow = 2,
layout_matrix = rbind(c(1, 2, rep(4, 15)), c(1, 3, rep(4, 15))))
post_dly_coef <- grid.arrange(ylab, posval, negval, post_dly_plt,
ncol = 2, nrow = 2,
layout_matrix = rbind(c(1, 2, rep(4, 15)), c(1, 3, rep(4, 15))))
Plot difference between daily and occasional user
f2_f1 <- (lpmat_dly - lpmat_occ) %*% post_gam.coef
f2_f1.se <- sqrt(diag((lpmat_dly - lpmat_occ) %*% post_gam.cov %*% t((lpmat_dly - lpmat_occ))))
f2_f1_diff_df <- data.frame(frame = seq(0, 400),
f2f1_diff = f2_f1,
f2f1_se = f2_f1.se)
ggplot(data = f2_f1_diff_df, aes(x=frame, y = f2f1_diff))+
geom_line()+
geom_line(aes(x = frame, y = f2f1_diff + 2*f2f1_se), linetype = "longdash")+
geom_line(aes(x = frame, y = f2f1_diff - 2*f2f1_se), linetype = "longdash")+
labs(title = "Difference between Average Daily vs Occasional User", ylab = "f2_hat - f1_hat")+
theme_bw()
post_dlyocc_plt <- ggplot(data = f2_f1_diff_df, aes(x=frame, y = f2f1_diff))+
geom_line()+
geom_line(aes(x = frame, y = f2f1_diff + 2*f2f1_se), linetype = "longdash")+
geom_line(aes(x = frame, y = f2f1_diff - 2*f2f1_se), linetype = "longdash")+
geom_line(aes(x = frame, y = 0), col = "darkred")+
labs(title = "Difference in Percent Change: Daily vs Occasional User",
y = "")+
theme_bw()+
scale_x_continuous(expand = c(0, 0))+
theme(rect = element_rect(fill = "transparent"))
ylab <- textGrob(label = "Difference in Percent Change", rot = 90,
gp = gpar(fontsize = 12))
posval <- textGrob(label = sapply(strwrap("Less Constriction in Daily User", width = 15, simplify = F),
paste, collapse = "\n"), rot = 0, just = "bottom",
gp = gpar(fontsize = 8))
negval <- textGrob(label = sapply(strwrap("More Constriction in Daily User", width = 15, simplify = F),
paste, collapse = "\n"), rot = 0,
gp = gpar(fontsize = 8))
post_dlyocc_coef <- grid.arrange(ylab, posval, negval, post_dlyocc_plt,
ncol = 2, nrow = 2,
layout_matrix = rbind(c(1, 2, rep(4, 15)), c(1, 3, rep(4, 15))))
post_dlyocc_coef
## TableGrob (2 x 17) "arrange": 4 grobs
## z cells name grob
## 1 1 ( 1- 2, 1- 1) arrange text[GRID.text.5806]
## 2 2 ( 1- 1, 2- 2) arrange text[GRID.text.5807]
## 3 3 ( 2- 2, 2- 2) arrange text[GRID.text.5808]
## 4 4 ( 1- 2, 3-17) arrange gtable[layout]
smoker.gam.df <- merge(right_0.post, pt.analytic.df,
by = c("subject_id", "user_cat"))
smoker.gam.df$sind <- smoker.gam.df$frame/400
smoker.gam.df$smoker <- ifelse(smoker.gam.df$user_cat == "non-user", 0, 1)
m.post_smoker_gam <- mgcv::gam(percent_change ~ s(sind, k=30, bs="cr") +
s(sind, by=smoker, k=30, bs = "cr"),
data = smoker.gam.df, method = "REML")
## Create a matrix of residuals
post_smoker_gam.resid <- cbind(smoker.gam.df[!(is.na(smoker.gam.df$percent_change)), c("subject_id", "frame")], m.post_smoker_gam$residuals)
names(post_smoker_gam.resid) <- c("subject_id", "frame", "resid")
post_smoker_gam.resid$frame_char <- str_pad(post_smoker_gam.resid$frame, width = 3, side = "left", pad = "0")
resid_mat <- reshape(post_smoker_gam.resid[, c("subject_id", "frame_char", "resid")],
timevar = "frame_char",
idvar = "subject_id",
direction = "wide")
rownames(resid_mat) <- resid_mat$subject_id
resid_mat$subject_id <- NULL
resid_names <- names(resid_mat)
resid_names <- sort(resid_names)
resid_mat <- resid_mat[, resid_names]
resid_mat <- as.matrix(resid_mat)
post_smoker_gam.fpca <- fpca.face(resid_mat, knots = 15)
#Create data frame of eigen functions and attach to original data
Phi_mat <- as.data.frame(post_smoker_gam.fpca$efunctions)
colnames(Phi_mat) <- paste0("Phi", seq(1, post_smoker_gam.fpca$npc))
Phi_mat$frame <- as.numeric(rownames(Phi_mat)) -1
smoker.gam.df <- merge(smoker.gam.df, Phi_mat,
by = "frame",
all.x = T)
smoker.gam.df <- smoker.gam.df[order(smoker.gam.df$subject_id, smoker.gam.df$frame), ]
smoker.gam.df$subject_id <- as.factor(smoker.gam.df$subject_id)
post_smoker_gam.fri <- mgcv::bam(percent_change ~
s(sind, k=30, bs="cr") +
s(sind, by=smoker, k=30, bs = "cr") +
s(subject_id, by = Phi1, bs="re") + s(subject_id, by = Phi2, bs="re")+
s(subject_id, by = Phi3, bs="re") + s(subject_id, by = Phi4, bs="re"),
method = "fREML", data = smoker.gam.df, discrete = TRUE)
summary(post_smoker_gam.fri)
##
## Family: gaussian
## Link function: identity
##
## Formula:
## percent_change ~ s(sind, k = 30, bs = "cr") + s(sind, by = smoker,
## k = 30, bs = "cr") + s(subject_id, by = Phi1, bs = "re") +
## s(subject_id, by = Phi2, bs = "re") + s(subject_id, by = Phi3,
## bs = "re") + s(subject_id, by = Phi4, bs = "re")
##
## Parametric coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -10.3228 0.9638 -10.71 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Approximate significance of smooth terms:
## edf Ref.df F p-value
## s(sind) 27.31 28.29 113.71 <2e-16 ***
## s(sind):smoker 19.43 22.72 66.92 <2e-16 ***
## s(subject_id):Phi1 82.65 84.00 23988.93 <2e-16 ***
## s(subject_id):Phi2 82.26 84.00 2727.88 <2e-16 ***
## s(subject_id):Phi3 82.56 84.00 928.92 <2e-16 ***
## s(subject_id):Phi4 81.71 84.00 1282.11 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## R-sq.(adj) = 0.965 Deviance explained = 96.6%
## fREML = 63270 Scale est. = 2.6509 n = 32558
Plot difference between smoker and non-smoker
post_smoker_gam.coef <- post_smoker_gam.fri$coefficients
post_smoker_gam.cov <- vcov.gam(post_smoker_gam.fri)
df_pred_non <- data.frame("sind" = unique(smoker.gam.df$sind), "smoker" =0,
"Phi1" = 0, "Phi2" = 0, "Phi3" = 0,
"Phi4" = 0,
"subject_id" = smoker.gam.df$subject_id[1])
lpmat_non <- predict(post_smoker_gam.fri, newdata=df_pred_non, se.fit=TRUE, type = "lpmatrix")
df_pred_smk <- data.frame("sind" = unique(smoker.gam.df$sind), "smoker" = 1,
"Phi1" = 0, "Phi2" = 0, "Phi3" = 0,
"Phi4" = 0,
"subject_id" = smoker.gam.df$subject_id[1])
lpmat_smk <- predict(post_smoker_gam.fri, newdata=df_pred_smk, se.fit=TRUE, type = "lpmatrix")
pred_df <- data.frame(frame = rep(seq(0, 400), 2),
user = c(rep("non-user", 401), rep("smoker", 401)),
mean = c(lpmat_non %*% post_smoker_gam.coef,
lpmat_smk %*% post_smoker_gam.coef),
se = c(sqrt(diag(lpmat_non %*% post_smoker_gam.cov %*% t(lpmat_non))),
sqrt(diag(lpmat_smk %*% post_smoker_gam.cov %*% t(lpmat_smk)))),
stringsAsFactors = F)
post_smkProfile <- ggplot(data = pred_df, aes(x = frame, y = mean, group = user, col = user))+
geom_line()+
scale_y_continuous(limits = c(min(pred_df$mean - 2*pred_df$se),
max(pred_df$mean+2*pred_df$se)))+
labs(y = "Percent Change")+
theme_bw()
legend_smkProfile <- g_legend(post_smkProfile)
post_smkProfile.se <- ggplot(data = pred_df, aes(x = frame, y = mean, group = user, col = user))+
geom_line()+
geom_line(aes(x=frame, y = mean + 2*se, group = user, col = user), linetype = "longdash")+
geom_line(aes(x=frame, y = mean - 2*se, group = user, col = user), linetype = "longdash")+
scale_y_continuous(limits = c(min(pred_df$mean - 2*pred_df$se),
max(pred_df$mean+2*pred_df$se)))+
labs(y = "")+
theme_bw()
post_smk <- grid.arrange(arrangeGrob(post_smkProfile+theme(legend.position = "none"),
post_smkProfile.se+theme(legend.position = "none"), nrow = 1),
legend_smkProfile, nrow = 1,
widths = c(30, 6))
# pred_f0 <- predict(post_smoker_gam.fri, newdata=df_pred_non, se.fit=TRUE, type = "iterms")
pred_f1 <- predict(post_smoker_gam.fri, newdata=df_pred_smk, se.fit=TRUE, type = "terms")
pred_coef_df <- data.frame(frame = seq(0, 400),
f0_hat = lpmat_non %*% post_smoker_gam.coef,
f0_se = sqrt(diag(lpmat_non %*% post_smoker_gam.cov %*% t(lpmat_non))),
f1_hat = pred_f1$fit[, 2],
f1_se = pred_f1$se.fit[, 2])
post_smk_plt <- ggplot(data = pred_coef_df, aes(x=frame, y = f1_hat))+
geom_line()+
geom_line(aes(x = frame, y = f1_hat + 2*f1_se), linetype = "longdash")+
geom_line(aes(x = frame, y = f1_hat - 2*f1_se), linetype = "longdash")+
geom_line(aes(x = frame, y = 0), col = "darkred")+
labs(title = "Difference in Percent Change: Smoker and Non-smoker",
y = "")+
theme_bw()+
scale_x_continuous(expand = c(0, 0))+
theme(rect = element_rect(fill = "transparent"))
ylab <- textGrob(label = "Difference in Percent Change", rot = 90,
gp = gpar(fontsize = 12))
posval <- textGrob(label = sapply(strwrap("Less Constriction in Smokers", width = 15, simplify = F),
paste, collapse = "\n"), rot = 0, just = "bottom",
gp = gpar(fontsize = 8))
negval <- textGrob(label = sapply(strwrap("More Constriction in Smokers", width = 15, simplify = F),
paste, collapse = "\n"), rot = 0,
gp = gpar(fontsize = 8))
post_smk_coef <- grid.arrange(ylab, posval, negval, post_smk_plt,
ncol = 2, nrow = 2,
layout_matrix = rbind(c(1, 2, rep(4, 15)), c(1, 3, rep(4, 15))))
right_0.pt.w <- right_0.pt.w[order(right_0.pt.w$subject_id), ]
wi_pffr.df <- data.frame("subject_id" = pt.analytic.df$subject_id,
"occ_user" = pt.analytic.df$occasional,
"daily_user" = pt.analytic.df$daily,
"wi_pct_chg" = I(as.matrix(right_0.pt.w[, c(3:403)])))
wi_pffr.df$subject_id <- as.factor(wi_pffr.df$subject_id)
m.wi_pffr <- pffr(wi_pct_chg ~ occ_user + daily_user + s(subject_id, bs="re"),
data = wi_pffr.df,
algorithm = "bam",
method = "fREML",
discrete = T)
summary(m.wi_pffr)
par(mfrow = c(1,3))
plot(m.wi_pffr)
right_0.pt.gam.df <- merge(right_0.pt, pt.analytic.df,
by = c("subject_id", "user_cat"))
right_0.pt.gam.df$sind <- right_0.pt.gam.df$frame/400
m.wi_gam <- mgcv::gam(wiPctChg ~ s(sind, k=30, bs="cr") + s(sind, by=occasional, k=30, bs = "cr") + s(sind, by=daily, k=30, bs = "cr"),
data = right_0.pt.gam.df, method = "REML")
## Create a matrix of residuals
wi_gam.resid <- cbind(right_0.pt.gam.df[!(is.na(right_0.pt.gam.df$wiPctChg)), c("subject_id", "frame")], m.wi_gam$residuals)
names(wi_gam.resid) <- c("subject_id", "frame", "resid")
wi_gam.resid$frame_char <- str_pad(wi_gam.resid$frame, width = 3, side = "left", pad = "0")
resid_mat <- reshape(wi_gam.resid[, c("subject_id", "frame_char", "resid")],
timevar = "frame_char",
idvar = "subject_id",
direction = "wide")
rownames(resid_mat) <- resid_mat$subject_id
resid_mat$subject_id <- NULL
resid_names <- names(resid_mat)
resid_names <- sort(resid_names)
resid_mat <- resid_mat[, resid_names]
resid_mat <- as.matrix(resid_mat)
wi_gam.fpca <- fpca.face(resid_mat, knots = 15)
wi_gam.fpca$evalues/sum(wi_gam.fpca$evalues)
## [1] 0.632634192 0.151444985 0.080422256 0.047792649 0.026613912 0.018366352
## [7] 0.015600706 0.010352858 0.007277261 0.005076642 0.004418187
#Create data frame of eigen functions and attach to original data
Phi_mat <- as.data.frame(wi_gam.fpca$efunctions)
colnames(Phi_mat) <- paste0("Phi", seq(1, wi_gam.fpca$npc))
Phi_mat$frame <- as.numeric(rownames(Phi_mat)) -1
right_0.pt.gam.df <- merge(right_0.pt.gam.df, Phi_mat,
by = "frame",
all.x = T)
right_0.pt.gam.df <- right_0.pt.gam.df[order(right_0.pt.gam.df$subject_id, right_0.pt.gam.df$frame), ]
right_0.pt.gam.df$subject_id <- as.factor(right_0.pt.gam.df$subject_id)
wi_gam.fri <- mgcv::bam(wiPctChg ~ s(sind, k=30, bs="cr") +
s(sind, by=occasional, k=30, bs = "cr") +
s(sind, by=daily, k=30, bs = "cr")+
s(subject_id, by = Phi1, bs="re") + s(subject_id, by = Phi2, bs="re")+
s(subject_id, by = Phi3, bs="re") + s(subject_id, by = Phi4, bs="re")+
s(subject_id, by = Phi5, bs="re") + s(subject_id, by = Phi6, bs="re"),
method = "fREML", data = right_0.pt.gam.df, discrete = TRUE)
summary(wi_gam.fri)
##
## Family: gaussian
## Link function: identity
##
## Formula:
## wiPctChg ~ s(sind, k = 30, bs = "cr") + s(sind, by = occasional,
## k = 30, bs = "cr") + s(sind, by = daily, k = 30, bs = "cr") +
## s(subject_id, by = Phi1, bs = "re") + s(subject_id, by = Phi2,
## bs = "re") + s(subject_id, by = Phi3, bs = "re") + s(subject_id,
## by = Phi4, bs = "re") + s(subject_id, by = Phi5, bs = "re") +
## s(subject_id, by = Phi6, bs = "re")
##
## Parametric coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 4.069 1.039 3.916 9.01e-05 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Approximate significance of smooth terms:
## edf Ref.df F p-value
## s(sind) 18.76 21.67 27.45 <2e-16 ***
## s(sind):occasional 18.76 21.87 23.03 <2e-16 ***
## s(sind):daily 19.52 22.63 31.09 <2e-16 ***
## s(subject_id):Phi1 81.63 84.00 30859.17 <2e-16 ***
## s(subject_id):Phi2 82.03 84.00 18059.93 <2e-16 ***
## s(subject_id):Phi3 82.79 84.00 5517.60 <2e-16 ***
## s(subject_id):Phi4 82.47 84.00 2941.06 <2e-16 ***
## s(subject_id):Phi5 82.25 84.00 498.20 <2e-16 ***
## s(subject_id):Phi6 82.70 84.00 842.40 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## R-sq.(adj) = 0.954 Deviance explained = 95.5%
## fREML = 71066 Scale est. = 4.5186 n = 31934
wi_gam.coef <- wi_gam.fri$coefficients
wi_gam.cov <- vcov.gam(wi_gam.fri)
df_pred_non <- data.frame("sind" = unique(right_0.pt.gam.df$sind), "occasional" = 0, "daily" = 0,
"Phi1" = 0, "Phi2" = 0, "Phi3" = 0,
"Phi4" = 0, "Phi5" = 0, "Phi6" = 0,
"subject_id" = right_0.pt.gam.df$subject_id[1])
lpmat_non <- predict(wi_gam.fri, newdata=df_pred_non, se.fit=TRUE, type = "lpmatrix")
df_pred_occ <- data.frame("sind" = unique(right_0.pt.gam.df$sind), "occasional" = 1, "daily" = 0,
"Phi1" = 0, "Phi2" = 0, "Phi3" = 0,
"Phi4" = 0, "Phi5" = 0, "Phi6" = 0,
"subject_id" = right_0.pt.gam.df$subject_id[1])
lpmat_occ <- predict(wi_gam.fri, newdata=df_pred_occ, se.fit=TRUE, type = "lpmatrix")
df_pred_dly <- data.frame("sind" = unique(right_0.pt.gam.df$sind), "occasional" = 0, "daily" = 1,
"Phi1" = 0, "Phi2" = 0, "Phi3" = 0,
"Phi4" = 0, "Phi5" = 0, "Phi6" = 0,
"subject_id" = right_0.pt.gam.df$subject_id[1])
lpmat_dly <- predict(wi_gam.fri, newdata=df_pred_dly, se.fit=TRUE, type = "lpmatrix")
pred_df <- data.frame(frame = rep(seq(0, 400), 3),
user = c(rep("non-user", 401), rep("occasional", 401),rep("daily", 401)),
mean = c(lpmat_non %*% wi_gam.coef,
lpmat_occ %*% wi_gam.coef, lpmat_dly %*% wi_gam.coef),
se = c(sqrt(diag(lpmat_non %*% wi_gam.cov %*% t(lpmat_non))),
sqrt(diag(lpmat_occ %*% wi_gam.cov %*% t(lpmat_occ))),
sqrt(diag(lpmat_dly %*% wi_gam.cov %*% t(lpmat_dly)))),
stringsAsFactors = F)
# ggplot(data = pred_df, aes(x = frame, y = mean, group = user, col = user))+
# geom_line()+
# geom_line(aes(x=frame, y = mean + 2*se, group = user, col = user), linetype = "longdash")+
# geom_line(aes(x=frame, y = mean - 2*se, group = user, col = user), linetype = "longdash")+
# theme_bw()
wi_userProfile <- ggplot(data = pred_df, aes(x = frame, y = mean, group = user, col = user))+
geom_line()+
scale_y_continuous(limits = c(min(pred_df$mean - 2*pred_df$se),
max(pred_df$mean+2*pred_df$se)))+
labs(y = "Within Person Difference in Percent Change")+
theme_bw()
legend_userProfile <- g_legend(wi_userProfile)
wi_userProfile.se <- ggplot(data = pred_df, aes(x = frame, y = mean, group = user, col = user))+
geom_line()+
geom_line(aes(x=frame, y = mean + 2*se, group = user, col = user), linetype = "longdash")+
geom_line(aes(x=frame, y = mean - 2*se, group = user, col = user), linetype = "longdash")+
scale_y_continuous(limits = c(min(pred_df$mean - 2*pred_df$se),
max(pred_df$mean+2*pred_df$se)))+
labs(y = "")+
theme_bw()
wi_userTraj <- grid.arrange(arrangeGrob(wi_userProfile+theme(legend.position = "none"),
wi_userProfile.se+theme(legend.position = "none"), nrow = 1),
legend_userProfile, nrow = 1,
widths = c(30, 6))
pred_f1 <- predict(wi_gam.fri, newdata=df_pred_occ, se.fit=TRUE, type = "terms")
pred_f2 <- predict(wi_gam.fri, newdata=df_pred_dly, se.fit=TRUE, type = "terms")
pred_coef_df <- data.frame(frame = seq(0, 400),
f0_hat = lpmat_non %*% wi_gam.coef,
f0_se = sqrt(diag(lpmat_non %*% wi_gam.cov %*% t(lpmat_non))),
f1_hat = pred_f1$fit[, 2],
f1_se = pred_f1$se.fit[, 2],
f2_hat = pred_f2$fit[, 3],
f2_se = pred_f2$se.fit[, 3])
# ggplot(data = pred_coef_df, aes(x=frame, y = f0_hat))+
# geom_line()+
# geom_line(aes(x = frame, y = f0_hat + 2*f0_se), linetype = "longdash")+
# geom_line(aes(x = frame, y = f0_hat - 2*f0_se), linetype = "longdash")+
# labs(title = "Average Response of a Non-user", ylab = "Within Person Difference in Percent Change")+
# theme_bw()
wi_occ_plt <- ggplot(data = pred_coef_df, aes(x=frame, y = f1_hat))+
geom_line()+
geom_line(aes(x = frame, y = f1_hat + 2*f1_se), linetype = "longdash")+
geom_line(aes(x = frame, y = f1_hat - 2*f1_se), linetype = "longdash")+
geom_line(aes(x = frame, y = 0), col = "darkred")+
labs(title = "Difference in Within Person Difference (WPD): Occasional and Non-user",
y = "")+
theme_bw()+
scale_x_continuous(expand = c(0, 0))+
theme(rect = element_rect(fill = "transparent"))
wi_dly_plt <- ggplot(data = pred_coef_df, aes(x=frame, y = f2_hat))+
geom_line()+
geom_line(aes(x = frame, y = f2_hat + 2*f2_se), linetype = "longdash")+
geom_line(aes(x = frame, y = f2_hat - 2*f2_se), linetype = "longdash")+
geom_line(aes(x = frame, y = 0), col = "darkred")+
labs(title = "Difference in Within Person Difference (WPD): Daily and Non-user",
y = "")+
theme_bw()+
scale_x_continuous(expand = c(0, 0))+
theme(rect = element_rect(fill = "transparent"))
ylab <- textGrob(label = "Difference in WPD in Percent Change", rot = 90,
gp = gpar(fontsize = 12))
posval <- textGrob(label = sapply(strwrap("More WPD Constriction in User", width = 15, simplify = F),
paste, collapse = "\n"), rot = 0,
gp = gpar(fontsize = 8))
negval <- textGrob(label = sapply(strwrap("Less WPD Constriction in User", width = 15, simplify = F),
paste, collapse = "\n"), rot = 0,
gp = gpar(fontsize = 8))
wi_occ_coef <- grid.arrange(ylab, posval, negval, wi_occ_plt,
ncol = 2, nrow = 2,
layout_matrix = rbind(c(1, 2, rep(4, 15)), c(1, 3, rep(4, 15))))
wi_dly_coef <- grid.arrange(ylab, posval, negval, wi_dly_plt,
ncol = 2, nrow = 2,
layout_matrix = rbind(c(1, 2, rep(4, 15)), c(1, 3, rep(4, 15))))
This plot compares the difference between occasional and non-users for post and WPD data. In the post data there is significant positive difference in the occasional and non-users from frame 50 - 125 which would indicate that occasional user have less constriction during this time. In the WPD data there is a significant positive difference in the occasional and non-users from frame 10 - 175, indicating a greater difference in the difference between post and pre for the occasional vs non-user. Since the typical response has a positive difference between post and pre during this interval caused by less constriction at post compared to pre, we can say that there is even less constriction in post data of an occasional user compared to his/her pre than in a non-user.
These results are similar to the occassional vs non-user but less pronounced maybe indicating more variability in daily and non-users.
Differences in Within Person Difference between Daily and Occassional Users
Difference in Within Person Difference Smoker vs Non-smokers
wi.smoker.gam.df <- merge(right_0.pt, pt.analytic.df,
by = c("subject_id", "user_cat"))
wi.smoker.gam.df$sind <- wi.smoker.gam.df$frame/400
wi.smoker.gam.df$smoker <- ifelse(wi.smoker.gam.df$user_cat == "non-user", 0, 1)
m.wi_smoker_gam <- mgcv::gam(wiPctChg ~ s(sind, k=15, bs="cr") +
s(sind, by=smoker, k=15, bs = "cr"),
data = wi.smoker.gam.df, method = "REML")
## Create a matrix of residuals
wi_smoker_gam.resid <- cbind(wi.smoker.gam.df[!(is.na(wi.smoker.gam.df$wiPctChg)), c("subject_id", "frame")], m.wi_smoker_gam$residuals)
names(wi_smoker_gam.resid) <- c("subject_id", "frame", "resid")
wi_smoker_gam.resid$frame_char <- str_pad(wi_smoker_gam.resid$frame, width = 3, side = "left", pad = "0")
resid_mat <- reshape(wi_smoker_gam.resid[, c("subject_id", "frame_char", "resid")],
timevar = "frame_char",
idvar = "subject_id",
direction = "wide")
rownames(resid_mat) <- resid_mat$subject_id
resid_mat$subject_id <- NULL
resid_names <- names(resid_mat)
resid_names <- sort(resid_names)
resid_mat <- resid_mat[, resid_names]
resid_mat <- as.matrix(resid_mat)
wi_smoker_gam.fpca <- fpca.face(resid_mat, knots = 15)
#Create data frame of eigen functions and attach to original data
Phi_mat <- as.data.frame(wi_smoker_gam.fpca$efunctions)
colnames(Phi_mat) <- paste0("Phi", seq(1, wi_smoker_gam.fpca$npc))
Phi_mat$frame <- as.numeric(rownames(Phi_mat)) -1
wi.smoker.gam.df <- merge(wi.smoker.gam.df, Phi_mat,
by = "frame",
all.x = T)
wi.smoker.gam.df <- smoker.gam.df[order(wi.smoker.gam.df$subject_id, wi.smoker.gam.df$frame), ]
wi.smoker.gam.df$subject_id <- as.factor(wi.smoker.gam.df$subject_id)
wi_smoker_gam.fri <- mgcv::bam(percent_change ~
s(sind, k=15, bs="cr") +
s(sind, by=smoker, k=15, bs = "cr") +
s(subject_id, by = Phi1, bs="re") + s(subject_id, by = Phi2, bs="re")+
s(subject_id, by = Phi3, bs="re") + s(subject_id, by = Phi4, bs="re"),
method = "fREML", data = wi.smoker.gam.df, discrete = TRUE)
summary(wi_smoker_gam.fri)
##
## Family: gaussian
## Link function: identity
##
## Formula:
## percent_change ~ s(sind, k = 15, bs = "cr") + s(sind, by = smoker,
## k = 15, bs = "cr") + s(subject_id, by = Phi1, bs = "re") +
## s(subject_id, by = Phi2, bs = "re") + s(subject_id, by = Phi3,
## bs = "re") + s(subject_id, by = Phi4, bs = "re")
##
## Parametric coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -0.781 1.018 -0.768 0.443
##
## Approximate significance of smooth terms:
## edf Ref.df F p-value
## s(sind) 13.41 13.71 205.0 <2e-16 ***
## s(sind):smoker 13.68 14.32 103.4 <2e-16 ***
## s(subject_id):Phi1 83.12 84.00 25028.8 <2e-16 ***
## s(subject_id):Phi2 81.48 84.00 7165.8 <2e-16 ***
## s(subject_id):Phi3 82.06 84.00 1630.2 <2e-16 ***
## s(subject_id):Phi4 80.94 84.00 3501.6 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## R-sq.(adj) = 0.965 Deviance explained = 96.6%
## fREML = 62716 Scale est. = 2.6788 n = 32170
Plot difference between smoker and non-smoker
wi_smoker_gam.coef <- wi_smoker_gam.fri$coefficients
wi_smoker_gam.cov <- vcov.gam(wi_smoker_gam.fri)
df_pred_non <- data.frame("sind" = sort(unique(wi.smoker.gam.df$sind)), "smoker" =0,
"Phi1" = 0, "Phi2" = 0, "Phi3" = 0,
"Phi4" = 0,
"subject_id" = wi.smoker.gam.df$subject_id[1])
lpmat_non <- predict(wi_smoker_gam.fri, newdata=df_pred_non, se.fit=TRUE, type = "lpmatrix")
df_pred_smk <- data.frame("sind" = sort(unique(wi.smoker.gam.df$sind)), "smoker" = 1,
"Phi1" = 0, "Phi2" = 0, "Phi3" = 0,
"Phi4" = 0,
"subject_id" = wi.smoker.gam.df$subject_id[1])
lpmat_smk <- predict(wi_smoker_gam.fri, newdata=df_pred_smk, se.fit=TRUE, type = "lpmatrix")
pred_df <- data.frame(frame = rep(seq(0, 400), 2),
user = c(rep("non-user", 401), rep("smoker", 401)),
mean = c(lpmat_non %*% wi_smoker_gam.coef,
lpmat_smk %*% wi_smoker_gam.coef),
se = c(sqrt(diag(lpmat_non %*% wi_smoker_gam.cov %*% t(lpmat_non))),
sqrt(diag(lpmat_smk %*% wi_smoker_gam.cov %*% t(lpmat_smk)))),
stringsAsFactors = F)
# ggplot(data = pred_df, aes(x = frame, y = mean, group = user, col = user))+
# geom_line()+
# geom_line(aes(x=frame, y = mean + 2*se, group = user, col = user), linetype = "longdash")+
# geom_line(aes(x=frame, y = mean - 2*se, group = user, col = user), linetype = "longdash")+
# theme_bw()
wi_smkProfile <- ggplot(data = pred_df, aes(x = frame, y = mean, group = user, col = user))+
geom_line()+
scale_y_continuous(limits = c(min(pred_df$mean - 2*pred_df$se),
max(pred_df$mean+2*pred_df$se)))+
labs(y = "Within Person Difference in Percent Change")+
theme_bw()
legend_smkProfile <- g_legend(wi_userProfile)
wi_smkProfile.se <- ggplot(data = pred_df, aes(x = frame, y = mean, group = user, col = user))+
geom_line()+
geom_line(aes(x=frame, y = mean + 2*se, group = user, col = user), linetype = "longdash")+
geom_line(aes(x=frame, y = mean - 2*se, group = user, col = user), linetype = "longdash")+
scale_y_continuous(limits = c(min(pred_df$mean - 2*pred_df$se),
max(pred_df$mean+2*pred_df$se)))+
labs(y = "")+
theme_bw()
wi_smk <- grid.arrange(arrangeGrob(wi_smkProfile+theme(legend.position = "none"),
wi_smkProfile.se+theme(legend.position = "none"), nrow = 1),
legend_smkProfile, nrow = 1,
widths = c(30, 6))
# pred_f0 <- predict(post_smoker_gam.fri, newdata=df_pred_non, se.fit=TRUE, type = "iterms")
pred_f1 <- predict(wi_smoker_gam.fri, newdata=df_pred_smk, se.fit=TRUE, type = "terms")
pred_coef_df <- data.frame(frame = seq(0, 400),
f0_hat = lpmat_non %*% wi_smoker_gam.coef,
f0_se = sqrt(diag(lpmat_non %*% wi_smoker_gam.cov %*% t(lpmat_non))),
f1_hat = pred_f1$fit[, 2],
f1_se = pred_f1$se.fit[, 2])
# ggplot(data = pred_coef_df, aes(x=frame, y = f0_hat))+
# geom_line()+
# geom_line(aes(x = frame, y = f0_hat + 2*f0_se), linetype = "longdash")+
# geom_line(aes(x = frame, y = f0_hat - 2*f0_se), linetype = "longdash")+
# labs(title = "Average Within Person Difference in Percent Change of a Non-user", ylab = "f0_hat")+
# theme_bw()
wi_smk_plt <- ggplot(data = pred_coef_df, aes(x=frame, y = f1_hat))+
geom_line()+
geom_line(aes(x = frame, y = f1_hat + 2*f1_se), linetype = "longdash")+
geom_line(aes(x = frame, y = f1_hat - 2*f1_se), linetype = "longdash")+
geom_line(aes(x = frame, y = 0), col = "darkred")+
labs(title = "Difference in Within Person Difference (WPD) in Percent Change \nbetween Smoker and Non-smoker",
y = "")+
theme_bw()+
scale_x_continuous(expand = c(0, 0))+
theme(rect = element_rect(fill = "transparent"))
ylab <- textGrob(label = "Difference in WPD in Percent Change", rot = 90,
gp = gpar(fontsize = 12))
posval <- textGrob(label = sapply(strwrap("More WPD Constriction in User", width = 15, simplify = F),
paste, collapse = "\n"), rot = 0,
gp = gpar(fontsize = 8))
negval <- textGrob(label = sapply(strwrap("Less WPD Constriction in User", width = 15, simplify = F),
paste, collapse = "\n"), rot = 0,
gp = gpar(fontsize = 8))
wi_smk_coef <- grid.arrange(ylab, posval, negval, wi_smk_plt,
ncol = 2, nrow = 2,
layout_matrix = rbind(c(1, 2, rep(4, 15)), c(1, 3, rep(4, 15))))
3.1 Comments for Ben